Merge tag 'v3.5-rc7' into late/soc
[firefly-linux-kernel-4.4.55.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50
51 #include <asm/uaccess.h>
52
53 #include <net/ip_vs.h>
54
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60
61 /* sysctl variables */
62
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65
66 int ip_vs_get_debug_level(void)
67 {
68         return sysctl_ip_vs_debug_level;
69 }
70 #endif
71
72
73 /*  Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75
76
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static bool __ip_vs_addr_is_local_v6(struct net *net,
80                                      const struct in6_addr *addr)
81 {
82         struct flowi6 fl6 = {
83                 .daddr = *addr,
84         };
85         struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86         bool is_local;
87
88         is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
89
90         dst_release(dst);
91         return is_local;
92 }
93 #endif
94
95 #ifdef CONFIG_SYSCTL
96 /*
97  *      update_defense_level is called from keventd and from sysctl,
98  *      so it needs to protect itself from softirqs
99  */
100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102         struct sysinfo i;
103         static int old_secure_tcp = 0;
104         int availmem;
105         int nomem;
106         int to_change = -1;
107
108         /* we only count free and buffered memory (in pages) */
109         si_meminfo(&i);
110         availmem = i.freeram + i.bufferram;
111         /* however in linux 2.5 the i.bufferram is total page cache size,
112            we need adjust it */
113         /* si_swapinfo(&i); */
114         /* availmem = availmem - (i.totalswap - i.freeswap); */
115
116         nomem = (availmem < ipvs->sysctl_amemthresh);
117
118         local_bh_disable();
119
120         /* drop_entry */
121         spin_lock(&ipvs->dropentry_lock);
122         switch (ipvs->sysctl_drop_entry) {
123         case 0:
124                 atomic_set(&ipvs->dropentry, 0);
125                 break;
126         case 1:
127                 if (nomem) {
128                         atomic_set(&ipvs->dropentry, 1);
129                         ipvs->sysctl_drop_entry = 2;
130                 } else {
131                         atomic_set(&ipvs->dropentry, 0);
132                 }
133                 break;
134         case 2:
135                 if (nomem) {
136                         atomic_set(&ipvs->dropentry, 1);
137                 } else {
138                         atomic_set(&ipvs->dropentry, 0);
139                         ipvs->sysctl_drop_entry = 1;
140                 };
141                 break;
142         case 3:
143                 atomic_set(&ipvs->dropentry, 1);
144                 break;
145         }
146         spin_unlock(&ipvs->dropentry_lock);
147
148         /* drop_packet */
149         spin_lock(&ipvs->droppacket_lock);
150         switch (ipvs->sysctl_drop_packet) {
151         case 0:
152                 ipvs->drop_rate = 0;
153                 break;
154         case 1:
155                 if (nomem) {
156                         ipvs->drop_rate = ipvs->drop_counter
157                                 = ipvs->sysctl_amemthresh /
158                                 (ipvs->sysctl_amemthresh-availmem);
159                         ipvs->sysctl_drop_packet = 2;
160                 } else {
161                         ipvs->drop_rate = 0;
162                 }
163                 break;
164         case 2:
165                 if (nomem) {
166                         ipvs->drop_rate = ipvs->drop_counter
167                                 = ipvs->sysctl_amemthresh /
168                                 (ipvs->sysctl_amemthresh-availmem);
169                 } else {
170                         ipvs->drop_rate = 0;
171                         ipvs->sysctl_drop_packet = 1;
172                 }
173                 break;
174         case 3:
175                 ipvs->drop_rate = ipvs->sysctl_am_droprate;
176                 break;
177         }
178         spin_unlock(&ipvs->droppacket_lock);
179
180         /* secure_tcp */
181         spin_lock(&ipvs->securetcp_lock);
182         switch (ipvs->sysctl_secure_tcp) {
183         case 0:
184                 if (old_secure_tcp >= 2)
185                         to_change = 0;
186                 break;
187         case 1:
188                 if (nomem) {
189                         if (old_secure_tcp < 2)
190                                 to_change = 1;
191                         ipvs->sysctl_secure_tcp = 2;
192                 } else {
193                         if (old_secure_tcp >= 2)
194                                 to_change = 0;
195                 }
196                 break;
197         case 2:
198                 if (nomem) {
199                         if (old_secure_tcp < 2)
200                                 to_change = 1;
201                 } else {
202                         if (old_secure_tcp >= 2)
203                                 to_change = 0;
204                         ipvs->sysctl_secure_tcp = 1;
205                 }
206                 break;
207         case 3:
208                 if (old_secure_tcp < 2)
209                         to_change = 1;
210                 break;
211         }
212         old_secure_tcp = ipvs->sysctl_secure_tcp;
213         if (to_change >= 0)
214                 ip_vs_protocol_timeout_change(ipvs,
215                                               ipvs->sysctl_secure_tcp > 1);
216         spin_unlock(&ipvs->securetcp_lock);
217
218         local_bh_enable();
219 }
220
221
222 /*
223  *      Timer for checking the defense
224  */
225 #define DEFENSE_TIMER_PERIOD    1*HZ
226
227 static void defense_work_handler(struct work_struct *work)
228 {
229         struct netns_ipvs *ipvs =
230                 container_of(work, struct netns_ipvs, defense_work.work);
231
232         update_defense_level(ipvs);
233         if (atomic_read(&ipvs->dropentry))
234                 ip_vs_random_dropentry(ipvs->net);
235         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238
239 int
240 ip_vs_use_count_inc(void)
241 {
242         return try_module_get(THIS_MODULE);
243 }
244
245 void
246 ip_vs_use_count_dec(void)
247 {
248         module_put(THIS_MODULE);
249 }
250
251
252 /*
253  *      Hash table: for virtual service lookups
254  */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263
264
265 /*
266  *      Returns hash value for virtual service
267  */
268 static inline unsigned int
269 ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
270                   const union nf_inet_addr *addr, __be16 port)
271 {
272         register unsigned int porth = ntohs(port);
273         __be32 addr_fold = addr->ip;
274
275 #ifdef CONFIG_IP_VS_IPV6
276         if (af == AF_INET6)
277                 addr_fold = addr->ip6[0]^addr->ip6[1]^
278                             addr->ip6[2]^addr->ip6[3];
279 #endif
280         addr_fold ^= ((size_t)net>>8);
281
282         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
283                 & IP_VS_SVC_TAB_MASK;
284 }
285
286 /*
287  *      Returns hash value of fwmark for virtual service lookup
288  */
289 static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
290 {
291         return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292 }
293
294 /*
295  *      Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
296  *      or in the ip_vs_svc_fwm_table by fwmark.
297  *      Should be called with locked tables.
298  */
299 static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 {
301         unsigned int hash;
302
303         if (svc->flags & IP_VS_SVC_F_HASHED) {
304                 pr_err("%s(): request for already hashed, called from %pF\n",
305                        __func__, __builtin_return_address(0));
306                 return 0;
307         }
308
309         if (svc->fwmark == 0) {
310                 /*
311                  *  Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
312                  */
313                 hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314                                          &svc->addr, svc->port);
315                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316         } else {
317                 /*
318                  *  Hash it by fwmark in svc_fwm_table
319                  */
320                 hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322         }
323
324         svc->flags |= IP_VS_SVC_F_HASHED;
325         /* increase its refcnt because it is referenced by the svc table */
326         atomic_inc(&svc->refcnt);
327         return 1;
328 }
329
330
331 /*
332  *      Unhashes a service from svc_table / svc_fwm_table.
333  *      Should be called with locked tables.
334  */
335 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336 {
337         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338                 pr_err("%s(): request for unhash flagged, called from %pF\n",
339                        __func__, __builtin_return_address(0));
340                 return 0;
341         }
342
343         if (svc->fwmark == 0) {
344                 /* Remove it from the svc_table table */
345                 list_del(&svc->s_list);
346         } else {
347                 /* Remove it from the svc_fwm_table table */
348                 list_del(&svc->f_list);
349         }
350
351         svc->flags &= ~IP_VS_SVC_F_HASHED;
352         atomic_dec(&svc->refcnt);
353         return 1;
354 }
355
356
357 /*
358  *      Get service by {netns, proto,addr,port} in the service table.
359  */
360 static inline struct ip_vs_service *
361 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
362                      const union nf_inet_addr *vaddr, __be16 vport)
363 {
364         unsigned int hash;
365         struct ip_vs_service *svc;
366
367         /* Check for "full" addressed entries */
368         hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369
370         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371                 if ((svc->af == af)
372                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
373                     && (svc->port == vport)
374                     && (svc->protocol == protocol)
375                     && net_eq(svc->net, net)) {
376                         /* HIT */
377                         return svc;
378                 }
379         }
380
381         return NULL;
382 }
383
384
385 /*
386  *      Get service by {fwmark} in the service table.
387  */
388 static inline struct ip_vs_service *
389 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
390 {
391         unsigned int hash;
392         struct ip_vs_service *svc;
393
394         /* Check for fwmark addressed entries */
395         hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396
397         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398                 if (svc->fwmark == fwmark && svc->af == af
399                     && net_eq(svc->net, net)) {
400                         /* HIT */
401                         return svc;
402                 }
403         }
404
405         return NULL;
406 }
407
408 struct ip_vs_service *
409 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
410                   const union nf_inet_addr *vaddr, __be16 vport)
411 {
412         struct ip_vs_service *svc;
413         struct netns_ipvs *ipvs = net_ipvs(net);
414
415         read_lock(&__ip_vs_svc_lock);
416
417         /*
418          *      Check the table hashed by fwmark first
419          */
420         if (fwmark) {
421                 svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422                 if (svc)
423                         goto out;
424         }
425
426         /*
427          *      Check the table hashed by <protocol,addr,port>
428          *      for "full" addressed entries
429          */
430         svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
431
432         if (svc == NULL
433             && protocol == IPPROTO_TCP
434             && atomic_read(&ipvs->ftpsvc_counter)
435             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436                 /*
437                  * Check if ftp service entry exists, the packet
438                  * might belong to FTP data connections.
439                  */
440                 svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
441         }
442
443         if (svc == NULL
444             && atomic_read(&ipvs->nullsvc_counter)) {
445                 /*
446                  * Check if the catch-all port (port zero) exists
447                  */
448                 svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
449         }
450
451   out:
452         if (svc)
453                 atomic_inc(&svc->usecnt);
454         read_unlock(&__ip_vs_svc_lock);
455
456         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457                       fwmark, ip_vs_proto_name(protocol),
458                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459                       svc ? "hit" : "not hit");
460
461         return svc;
462 }
463
464
465 static inline void
466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467 {
468         atomic_inc(&svc->refcnt);
469         dest->svc = svc;
470 }
471
472 static void
473 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
474 {
475         struct ip_vs_service *svc = dest->svc;
476
477         dest->svc = NULL;
478         if (atomic_dec_and_test(&svc->refcnt)) {
479                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480                               svc->fwmark,
481                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
482                               ntohs(svc->port), atomic_read(&svc->usecnt));
483                 free_percpu(svc->stats.cpustats);
484                 kfree(svc);
485         }
486 }
487
488
489 /*
490  *      Returns hash value for real service
491  */
492 static inline unsigned int ip_vs_rs_hashkey(int af,
493                                             const union nf_inet_addr *addr,
494                                             __be16 port)
495 {
496         register unsigned int porth = ntohs(port);
497         __be32 addr_fold = addr->ip;
498
499 #ifdef CONFIG_IP_VS_IPV6
500         if (af == AF_INET6)
501                 addr_fold = addr->ip6[0]^addr->ip6[1]^
502                             addr->ip6[2]^addr->ip6[3];
503 #endif
504
505         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506                 & IP_VS_RTAB_MASK;
507 }
508
509 /*
510  *      Hashes ip_vs_dest in rs_table by <proto,addr,port>.
511  *      should be called with locked tables.
512  */
513 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514 {
515         unsigned int hash;
516
517         if (!list_empty(&dest->d_list)) {
518                 return 0;
519         }
520
521         /*
522          *      Hash by proto,addr,port,
523          *      which are the parameters of the real service.
524          */
525         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526
527         list_add(&dest->d_list, &ipvs->rs_table[hash]);
528
529         return 1;
530 }
531
532 /*
533  *      UNhashes ip_vs_dest from rs_table.
534  *      should be called with locked tables.
535  */
536 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537 {
538         /*
539          * Remove it from the rs_table table.
540          */
541         if (!list_empty(&dest->d_list)) {
542                 list_del(&dest->d_list);
543                 INIT_LIST_HEAD(&dest->d_list);
544         }
545
546         return 1;
547 }
548
549 /*
550  *      Lookup real service by <proto,addr,port> in the real service table.
551  */
552 struct ip_vs_dest *
553 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
554                           const union nf_inet_addr *daddr,
555                           __be16 dport)
556 {
557         struct netns_ipvs *ipvs = net_ipvs(net);
558         unsigned int hash;
559         struct ip_vs_dest *dest;
560
561         /*
562          *      Check for "full" addressed entries
563          *      Return the first found entry
564          */
565         hash = ip_vs_rs_hashkey(af, daddr, dport);
566
567         read_lock(&ipvs->rs_lock);
568         list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
569                 if ((dest->af == af)
570                     && ip_vs_addr_equal(af, &dest->addr, daddr)
571                     && (dest->port == dport)
572                     && ((dest->protocol == protocol) ||
573                         dest->vfwmark)) {
574                         /* HIT */
575                         read_unlock(&ipvs->rs_lock);
576                         return dest;
577                 }
578         }
579         read_unlock(&ipvs->rs_lock);
580
581         return NULL;
582 }
583
584 /*
585  *      Lookup destination by {addr,port} in the given service
586  */
587 static struct ip_vs_dest *
588 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
589                   __be16 dport)
590 {
591         struct ip_vs_dest *dest;
592
593         /*
594          * Find the destination for the given service
595          */
596         list_for_each_entry(dest, &svc->destinations, n_list) {
597                 if ((dest->af == svc->af)
598                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
599                     && (dest->port == dport)) {
600                         /* HIT */
601                         return dest;
602                 }
603         }
604
605         return NULL;
606 }
607
608 /*
609  * Find destination by {daddr,dport,vaddr,protocol}
610  * Cretaed to be used in ip_vs_process_message() in
611  * the backup synchronization daemon. It finds the
612  * destination to be bound to the received connection
613  * on the backup.
614  *
615  * ip_vs_lookup_real_service() looked promissing, but
616  * seems not working as expected.
617  */
618 struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,
619                                    const union nf_inet_addr *daddr,
620                                    __be16 dport,
621                                    const union nf_inet_addr *vaddr,
622                                    __be16 vport, __u16 protocol, __u32 fwmark,
623                                    __u32 flags)
624 {
625         struct ip_vs_dest *dest;
626         struct ip_vs_service *svc;
627         __be16 port = dport;
628
629         svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
630         if (!svc)
631                 return NULL;
632         if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
633                 port = 0;
634         dest = ip_vs_lookup_dest(svc, daddr, port);
635         if (!dest)
636                 dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
637         if (dest)
638                 atomic_inc(&dest->refcnt);
639         ip_vs_service_put(svc);
640         return dest;
641 }
642
643 /*
644  *  Lookup dest by {svc,addr,port} in the destination trash.
645  *  The destination trash is used to hold the destinations that are removed
646  *  from the service table but are still referenced by some conn entries.
647  *  The reason to add the destination trash is when the dest is temporary
648  *  down (either by administrator or by monitor program), the dest can be
649  *  picked back from the trash, the remaining connections to the dest can
650  *  continue, and the counting information of the dest is also useful for
651  *  scheduling.
652  */
653 static struct ip_vs_dest *
654 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
655                      __be16 dport)
656 {
657         struct ip_vs_dest *dest, *nxt;
658         struct netns_ipvs *ipvs = net_ipvs(svc->net);
659
660         /*
661          * Find the destination in trash
662          */
663         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
664                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
665                               "dest->refcnt=%d\n",
666                               dest->vfwmark,
667                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
668                               ntohs(dest->port),
669                               atomic_read(&dest->refcnt));
670                 if (dest->af == svc->af &&
671                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
672                     dest->port == dport &&
673                     dest->vfwmark == svc->fwmark &&
674                     dest->protocol == svc->protocol &&
675                     (svc->fwmark ||
676                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
677                       dest->vport == svc->port))) {
678                         /* HIT */
679                         return dest;
680                 }
681
682                 /*
683                  * Try to purge the destination from trash if not referenced
684                  */
685                 if (atomic_read(&dest->refcnt) == 1) {
686                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
687                                       "from trash\n",
688                                       dest->vfwmark,
689                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
690                                       ntohs(dest->port));
691                         list_del(&dest->n_list);
692                         ip_vs_dst_reset(dest);
693                         __ip_vs_unbind_svc(dest);
694                         free_percpu(dest->stats.cpustats);
695                         kfree(dest);
696                 }
697         }
698
699         return NULL;
700 }
701
702
703 /*
704  *  Clean up all the destinations in the trash
705  *  Called by the ip_vs_control_cleanup()
706  *
707  *  When the ip_vs_control_clearup is activated by ipvs module exit,
708  *  the service tables must have been flushed and all the connections
709  *  are expired, and the refcnt of each destination in the trash must
710  *  be 1, so we simply release them here.
711  */
712 static void ip_vs_trash_cleanup(struct net *net)
713 {
714         struct ip_vs_dest *dest, *nxt;
715         struct netns_ipvs *ipvs = net_ipvs(net);
716
717         list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
718                 list_del(&dest->n_list);
719                 ip_vs_dst_reset(dest);
720                 __ip_vs_unbind_svc(dest);
721                 free_percpu(dest->stats.cpustats);
722                 kfree(dest);
723         }
724 }
725
726 static void
727 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
728 {
729 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
730
731         spin_lock_bh(&src->lock);
732
733         IP_VS_SHOW_STATS_COUNTER(conns);
734         IP_VS_SHOW_STATS_COUNTER(inpkts);
735         IP_VS_SHOW_STATS_COUNTER(outpkts);
736         IP_VS_SHOW_STATS_COUNTER(inbytes);
737         IP_VS_SHOW_STATS_COUNTER(outbytes);
738
739         ip_vs_read_estimator(dst, src);
740
741         spin_unlock_bh(&src->lock);
742 }
743
744 static void
745 ip_vs_zero_stats(struct ip_vs_stats *stats)
746 {
747         spin_lock_bh(&stats->lock);
748
749         /* get current counters as zero point, rates are zeroed */
750
751 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
752
753         IP_VS_ZERO_STATS_COUNTER(conns);
754         IP_VS_ZERO_STATS_COUNTER(inpkts);
755         IP_VS_ZERO_STATS_COUNTER(outpkts);
756         IP_VS_ZERO_STATS_COUNTER(inbytes);
757         IP_VS_ZERO_STATS_COUNTER(outbytes);
758
759         ip_vs_zero_estimator(stats);
760
761         spin_unlock_bh(&stats->lock);
762 }
763
764 /*
765  *      Update a destination in the given service
766  */
767 static void
768 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769                     struct ip_vs_dest_user_kern *udest, int add)
770 {
771         struct netns_ipvs *ipvs = net_ipvs(svc->net);
772         int conn_flags;
773
774         /* set the weight and the flags */
775         atomic_set(&dest->weight, udest->weight);
776         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
777         conn_flags |= IP_VS_CONN_F_INACTIVE;
778
779         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
780         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
781                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
782         } else {
783                 /*
784                  *    Put the real service in rs_table if not present.
785                  *    For now only for NAT!
786                  */
787                 write_lock_bh(&ipvs->rs_lock);
788                 ip_vs_rs_hash(ipvs, dest);
789                 write_unlock_bh(&ipvs->rs_lock);
790         }
791         atomic_set(&dest->conn_flags, conn_flags);
792
793         /* bind the service */
794         if (!dest->svc) {
795                 __ip_vs_bind_svc(dest, svc);
796         } else {
797                 if (dest->svc != svc) {
798                         __ip_vs_unbind_svc(dest);
799                         ip_vs_zero_stats(&dest->stats);
800                         __ip_vs_bind_svc(dest, svc);
801                 }
802         }
803
804         /* set the dest status flags */
805         dest->flags |= IP_VS_DEST_F_AVAILABLE;
806
807         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
808                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
809         dest->u_threshold = udest->u_threshold;
810         dest->l_threshold = udest->l_threshold;
811
812         spin_lock_bh(&dest->dst_lock);
813         ip_vs_dst_reset(dest);
814         spin_unlock_bh(&dest->dst_lock);
815
816         if (add)
817                 ip_vs_start_estimator(svc->net, &dest->stats);
818
819         write_lock_bh(&__ip_vs_svc_lock);
820
821         /* Wait until all other svc users go away */
822         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
823
824         if (add) {
825                 list_add(&dest->n_list, &svc->destinations);
826                 svc->num_dests++;
827         }
828
829         /* call the update_service, because server weight may be changed */
830         if (svc->scheduler->update_service)
831                 svc->scheduler->update_service(svc);
832
833         write_unlock_bh(&__ip_vs_svc_lock);
834 }
835
836
837 /*
838  *      Create a destination for the given service
839  */
840 static int
841 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
842                struct ip_vs_dest **dest_p)
843 {
844         struct ip_vs_dest *dest;
845         unsigned int atype;
846
847         EnterFunction(2);
848
849 #ifdef CONFIG_IP_VS_IPV6
850         if (svc->af == AF_INET6) {
851                 atype = ipv6_addr_type(&udest->addr.in6);
852                 if ((!(atype & IPV6_ADDR_UNICAST) ||
853                         atype & IPV6_ADDR_LINKLOCAL) &&
854                         !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
855                         return -EINVAL;
856         } else
857 #endif
858         {
859                 atype = inet_addr_type(svc->net, udest->addr.ip);
860                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
861                         return -EINVAL;
862         }
863
864         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
865         if (dest == NULL)
866                 return -ENOMEM;
867
868         dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
869         if (!dest->stats.cpustats)
870                 goto err_alloc;
871
872         dest->af = svc->af;
873         dest->protocol = svc->protocol;
874         dest->vaddr = svc->addr;
875         dest->vport = svc->port;
876         dest->vfwmark = svc->fwmark;
877         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
878         dest->port = udest->port;
879
880         atomic_set(&dest->activeconns, 0);
881         atomic_set(&dest->inactconns, 0);
882         atomic_set(&dest->persistconns, 0);
883         atomic_set(&dest->refcnt, 1);
884
885         INIT_LIST_HEAD(&dest->d_list);
886         spin_lock_init(&dest->dst_lock);
887         spin_lock_init(&dest->stats.lock);
888         __ip_vs_update_dest(svc, dest, udest, 1);
889
890         *dest_p = dest;
891
892         LeaveFunction(2);
893         return 0;
894
895 err_alloc:
896         kfree(dest);
897         return -ENOMEM;
898 }
899
900
901 /*
902  *      Add a destination into an existing service
903  */
904 static int
905 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
906 {
907         struct ip_vs_dest *dest;
908         union nf_inet_addr daddr;
909         __be16 dport = udest->port;
910         int ret;
911
912         EnterFunction(2);
913
914         if (udest->weight < 0) {
915                 pr_err("%s(): server weight less than zero\n", __func__);
916                 return -ERANGE;
917         }
918
919         if (udest->l_threshold > udest->u_threshold) {
920                 pr_err("%s(): lower threshold is higher than upper threshold\n",
921                         __func__);
922                 return -ERANGE;
923         }
924
925         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
926
927         /*
928          * Check if the dest already exists in the list
929          */
930         dest = ip_vs_lookup_dest(svc, &daddr, dport);
931
932         if (dest != NULL) {
933                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
934                 return -EEXIST;
935         }
936
937         /*
938          * Check if the dest already exists in the trash and
939          * is from the same service
940          */
941         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
942
943         if (dest != NULL) {
944                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
945                               "dest->refcnt=%d, service %u/%s:%u\n",
946                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
947                               atomic_read(&dest->refcnt),
948                               dest->vfwmark,
949                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
950                               ntohs(dest->vport));
951
952                 /*
953                  * Get the destination from the trash
954                  */
955                 list_del(&dest->n_list);
956
957                 __ip_vs_update_dest(svc, dest, udest, 1);
958                 ret = 0;
959         } else {
960                 /*
961                  * Allocate and initialize the dest structure
962                  */
963                 ret = ip_vs_new_dest(svc, udest, &dest);
964         }
965         LeaveFunction(2);
966
967         return ret;
968 }
969
970
971 /*
972  *      Edit a destination in the given service
973  */
974 static int
975 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
976 {
977         struct ip_vs_dest *dest;
978         union nf_inet_addr daddr;
979         __be16 dport = udest->port;
980
981         EnterFunction(2);
982
983         if (udest->weight < 0) {
984                 pr_err("%s(): server weight less than zero\n", __func__);
985                 return -ERANGE;
986         }
987
988         if (udest->l_threshold > udest->u_threshold) {
989                 pr_err("%s(): lower threshold is higher than upper threshold\n",
990                         __func__);
991                 return -ERANGE;
992         }
993
994         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
995
996         /*
997          *  Lookup the destination list
998          */
999         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1000
1001         if (dest == NULL) {
1002                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1003                 return -ENOENT;
1004         }
1005
1006         __ip_vs_update_dest(svc, dest, udest, 0);
1007         LeaveFunction(2);
1008
1009         return 0;
1010 }
1011
1012
1013 /*
1014  *      Delete a destination (must be already unlinked from the service)
1015  */
1016 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1017 {
1018         struct netns_ipvs *ipvs = net_ipvs(net);
1019
1020         ip_vs_stop_estimator(net, &dest->stats);
1021
1022         /*
1023          *  Remove it from the d-linked list with the real services.
1024          */
1025         write_lock_bh(&ipvs->rs_lock);
1026         ip_vs_rs_unhash(dest);
1027         write_unlock_bh(&ipvs->rs_lock);
1028
1029         /*
1030          *  Decrease the refcnt of the dest, and free the dest
1031          *  if nobody refers to it (refcnt=0). Otherwise, throw
1032          *  the destination into the trash.
1033          */
1034         if (atomic_dec_and_test(&dest->refcnt)) {
1035                 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1036                               dest->vfwmark,
1037                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1038                               ntohs(dest->port));
1039                 ip_vs_dst_reset(dest);
1040                 /* simply decrease svc->refcnt here, let the caller check
1041                    and release the service if nobody refers to it.
1042                    Only user context can release destination and service,
1043                    and only one user context can update virtual service at a
1044                    time, so the operation here is OK */
1045                 atomic_dec(&dest->svc->refcnt);
1046                 free_percpu(dest->stats.cpustats);
1047                 kfree(dest);
1048         } else {
1049                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1050                               "dest->refcnt=%d\n",
1051                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1052                               ntohs(dest->port),
1053                               atomic_read(&dest->refcnt));
1054                 list_add(&dest->n_list, &ipvs->dest_trash);
1055                 atomic_inc(&dest->refcnt);
1056         }
1057 }
1058
1059
1060 /*
1061  *      Unlink a destination from the given service
1062  */
1063 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1064                                 struct ip_vs_dest *dest,
1065                                 int svcupd)
1066 {
1067         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1068
1069         /*
1070          *  Remove it from the d-linked destination list.
1071          */
1072         list_del(&dest->n_list);
1073         svc->num_dests--;
1074
1075         /*
1076          *  Call the update_service function of its scheduler
1077          */
1078         if (svcupd && svc->scheduler->update_service)
1079                         svc->scheduler->update_service(svc);
1080 }
1081
1082
1083 /*
1084  *      Delete a destination server in the given service
1085  */
1086 static int
1087 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1088 {
1089         struct ip_vs_dest *dest;
1090         __be16 dport = udest->port;
1091
1092         EnterFunction(2);
1093
1094         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1095
1096         if (dest == NULL) {
1097                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1098                 return -ENOENT;
1099         }
1100
1101         write_lock_bh(&__ip_vs_svc_lock);
1102
1103         /*
1104          *      Wait until all other svc users go away.
1105          */
1106         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1107
1108         /*
1109          *      Unlink dest from the service
1110          */
1111         __ip_vs_unlink_dest(svc, dest, 1);
1112
1113         write_unlock_bh(&__ip_vs_svc_lock);
1114
1115         /*
1116          *      Delete the destination
1117          */
1118         __ip_vs_del_dest(svc->net, dest);
1119
1120         LeaveFunction(2);
1121
1122         return 0;
1123 }
1124
1125
1126 /*
1127  *      Add a service into the service hash table
1128  */
1129 static int
1130 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1131                   struct ip_vs_service **svc_p)
1132 {
1133         int ret = 0;
1134         struct ip_vs_scheduler *sched = NULL;
1135         struct ip_vs_pe *pe = NULL;
1136         struct ip_vs_service *svc = NULL;
1137         struct netns_ipvs *ipvs = net_ipvs(net);
1138
1139         /* increase the module use count */
1140         ip_vs_use_count_inc();
1141
1142         /* Lookup the scheduler by 'u->sched_name' */
1143         sched = ip_vs_scheduler_get(u->sched_name);
1144         if (sched == NULL) {
1145                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1146                 ret = -ENOENT;
1147                 goto out_err;
1148         }
1149
1150         if (u->pe_name && *u->pe_name) {
1151                 pe = ip_vs_pe_getbyname(u->pe_name);
1152                 if (pe == NULL) {
1153                         pr_info("persistence engine module ip_vs_pe_%s "
1154                                 "not found\n", u->pe_name);
1155                         ret = -ENOENT;
1156                         goto out_err;
1157                 }
1158         }
1159
1160 #ifdef CONFIG_IP_VS_IPV6
1161         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1162                 ret = -EINVAL;
1163                 goto out_err;
1164         }
1165 #endif
1166
1167         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1168         if (svc == NULL) {
1169                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1170                 ret = -ENOMEM;
1171                 goto out_err;
1172         }
1173         svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1174         if (!svc->stats.cpustats)
1175                 goto out_err;
1176
1177         /* I'm the first user of the service */
1178         atomic_set(&svc->usecnt, 0);
1179         atomic_set(&svc->refcnt, 0);
1180
1181         svc->af = u->af;
1182         svc->protocol = u->protocol;
1183         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1184         svc->port = u->port;
1185         svc->fwmark = u->fwmark;
1186         svc->flags = u->flags;
1187         svc->timeout = u->timeout * HZ;
1188         svc->netmask = u->netmask;
1189         svc->net = net;
1190
1191         INIT_LIST_HEAD(&svc->destinations);
1192         rwlock_init(&svc->sched_lock);
1193         spin_lock_init(&svc->stats.lock);
1194
1195         /* Bind the scheduler */
1196         ret = ip_vs_bind_scheduler(svc, sched);
1197         if (ret)
1198                 goto out_err;
1199         sched = NULL;
1200
1201         /* Bind the ct retriever */
1202         ip_vs_bind_pe(svc, pe);
1203         pe = NULL;
1204
1205         /* Update the virtual service counters */
1206         if (svc->port == FTPPORT)
1207                 atomic_inc(&ipvs->ftpsvc_counter);
1208         else if (svc->port == 0)
1209                 atomic_inc(&ipvs->nullsvc_counter);
1210
1211         ip_vs_start_estimator(net, &svc->stats);
1212
1213         /* Count only IPv4 services for old get/setsockopt interface */
1214         if (svc->af == AF_INET)
1215                 ipvs->num_services++;
1216
1217         /* Hash the service into the service table */
1218         write_lock_bh(&__ip_vs_svc_lock);
1219         ip_vs_svc_hash(svc);
1220         write_unlock_bh(&__ip_vs_svc_lock);
1221
1222         *svc_p = svc;
1223         /* Now there is a service - full throttle */
1224         ipvs->enable = 1;
1225         return 0;
1226
1227
1228  out_err:
1229         if (svc != NULL) {
1230                 ip_vs_unbind_scheduler(svc);
1231                 if (svc->inc) {
1232                         local_bh_disable();
1233                         ip_vs_app_inc_put(svc->inc);
1234                         local_bh_enable();
1235                 }
1236                 if (svc->stats.cpustats)
1237                         free_percpu(svc->stats.cpustats);
1238                 kfree(svc);
1239         }
1240         ip_vs_scheduler_put(sched);
1241         ip_vs_pe_put(pe);
1242
1243         /* decrease the module use count */
1244         ip_vs_use_count_dec();
1245
1246         return ret;
1247 }
1248
1249
1250 /*
1251  *      Edit a service and bind it with a new scheduler
1252  */
1253 static int
1254 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1255 {
1256         struct ip_vs_scheduler *sched, *old_sched;
1257         struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1258         int ret = 0;
1259
1260         /*
1261          * Lookup the scheduler, by 'u->sched_name'
1262          */
1263         sched = ip_vs_scheduler_get(u->sched_name);
1264         if (sched == NULL) {
1265                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1266                 return -ENOENT;
1267         }
1268         old_sched = sched;
1269
1270         if (u->pe_name && *u->pe_name) {
1271                 pe = ip_vs_pe_getbyname(u->pe_name);
1272                 if (pe == NULL) {
1273                         pr_info("persistence engine module ip_vs_pe_%s "
1274                                 "not found\n", u->pe_name);
1275                         ret = -ENOENT;
1276                         goto out;
1277                 }
1278                 old_pe = pe;
1279         }
1280
1281 #ifdef CONFIG_IP_VS_IPV6
1282         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1283                 ret = -EINVAL;
1284                 goto out;
1285         }
1286 #endif
1287
1288         write_lock_bh(&__ip_vs_svc_lock);
1289
1290         /*
1291          * Wait until all other svc users go away.
1292          */
1293         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1294
1295         /*
1296          * Set the flags and timeout value
1297          */
1298         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1299         svc->timeout = u->timeout * HZ;
1300         svc->netmask = u->netmask;
1301
1302         old_sched = svc->scheduler;
1303         if (sched != old_sched) {
1304                 /*
1305                  * Unbind the old scheduler
1306                  */
1307                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1308                         old_sched = sched;
1309                         goto out_unlock;
1310                 }
1311
1312                 /*
1313                  * Bind the new scheduler
1314                  */
1315                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1316                         /*
1317                          * If ip_vs_bind_scheduler fails, restore the old
1318                          * scheduler.
1319                          * The main reason of failure is out of memory.
1320                          *
1321                          * The question is if the old scheduler can be
1322                          * restored all the time. TODO: if it cannot be
1323                          * restored some time, we must delete the service,
1324                          * otherwise the system may crash.
1325                          */
1326                         ip_vs_bind_scheduler(svc, old_sched);
1327                         old_sched = sched;
1328                         goto out_unlock;
1329                 }
1330         }
1331
1332         old_pe = svc->pe;
1333         if (pe != old_pe) {
1334                 ip_vs_unbind_pe(svc);
1335                 ip_vs_bind_pe(svc, pe);
1336         }
1337
1338 out_unlock:
1339         write_unlock_bh(&__ip_vs_svc_lock);
1340 out:
1341         ip_vs_scheduler_put(old_sched);
1342         ip_vs_pe_put(old_pe);
1343         return ret;
1344 }
1345
1346
1347 /*
1348  *      Delete a service from the service list
1349  *      - The service must be unlinked, unlocked and not referenced!
1350  *      - We are called under _bh lock
1351  */
1352 static void __ip_vs_del_service(struct ip_vs_service *svc)
1353 {
1354         struct ip_vs_dest *dest, *nxt;
1355         struct ip_vs_scheduler *old_sched;
1356         struct ip_vs_pe *old_pe;
1357         struct netns_ipvs *ipvs = net_ipvs(svc->net);
1358
1359         pr_info("%s: enter\n", __func__);
1360
1361         /* Count only IPv4 services for old get/setsockopt interface */
1362         if (svc->af == AF_INET)
1363                 ipvs->num_services--;
1364
1365         ip_vs_stop_estimator(svc->net, &svc->stats);
1366
1367         /* Unbind scheduler */
1368         old_sched = svc->scheduler;
1369         ip_vs_unbind_scheduler(svc);
1370         ip_vs_scheduler_put(old_sched);
1371
1372         /* Unbind persistence engine */
1373         old_pe = svc->pe;
1374         ip_vs_unbind_pe(svc);
1375         ip_vs_pe_put(old_pe);
1376
1377         /* Unbind app inc */
1378         if (svc->inc) {
1379                 ip_vs_app_inc_put(svc->inc);
1380                 svc->inc = NULL;
1381         }
1382
1383         /*
1384          *    Unlink the whole destination list
1385          */
1386         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1387                 __ip_vs_unlink_dest(svc, dest, 0);
1388                 __ip_vs_del_dest(svc->net, dest);
1389         }
1390
1391         /*
1392          *    Update the virtual service counters
1393          */
1394         if (svc->port == FTPPORT)
1395                 atomic_dec(&ipvs->ftpsvc_counter);
1396         else if (svc->port == 0)
1397                 atomic_dec(&ipvs->nullsvc_counter);
1398
1399         /*
1400          *    Free the service if nobody refers to it
1401          */
1402         if (atomic_read(&svc->refcnt) == 0) {
1403                 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1404                               svc->fwmark,
1405                               IP_VS_DBG_ADDR(svc->af, &svc->addr),
1406                               ntohs(svc->port), atomic_read(&svc->usecnt));
1407                 free_percpu(svc->stats.cpustats);
1408                 kfree(svc);
1409         }
1410
1411         /* decrease the module use count */
1412         ip_vs_use_count_dec();
1413 }
1414
1415 /*
1416  * Unlink a service from list and try to delete it if its refcnt reached 0
1417  */
1418 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1419 {
1420         /*
1421          * Unhash it from the service table
1422          */
1423         write_lock_bh(&__ip_vs_svc_lock);
1424
1425         ip_vs_svc_unhash(svc);
1426
1427         /*
1428          * Wait until all the svc users go away.
1429          */
1430         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1431
1432         __ip_vs_del_service(svc);
1433
1434         write_unlock_bh(&__ip_vs_svc_lock);
1435 }
1436
1437 /*
1438  *      Delete a service from the service list
1439  */
1440 static int ip_vs_del_service(struct ip_vs_service *svc)
1441 {
1442         if (svc == NULL)
1443                 return -EEXIST;
1444         ip_vs_unlink_service(svc);
1445
1446         return 0;
1447 }
1448
1449
1450 /*
1451  *      Flush all the virtual services
1452  */
1453 static int ip_vs_flush(struct net *net)
1454 {
1455         int idx;
1456         struct ip_vs_service *svc, *nxt;
1457
1458         /*
1459          * Flush the service table hashed by <netns,protocol,addr,port>
1460          */
1461         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1462                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1463                                          s_list) {
1464                         if (net_eq(svc->net, net))
1465                                 ip_vs_unlink_service(svc);
1466                 }
1467         }
1468
1469         /*
1470          * Flush the service table hashed by fwmark
1471          */
1472         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1473                 list_for_each_entry_safe(svc, nxt,
1474                                          &ip_vs_svc_fwm_table[idx], f_list) {
1475                         if (net_eq(svc->net, net))
1476                                 ip_vs_unlink_service(svc);
1477                 }
1478         }
1479
1480         return 0;
1481 }
1482
1483 /*
1484  *      Delete service by {netns} in the service table.
1485  *      Called by __ip_vs_cleanup()
1486  */
1487 void ip_vs_service_net_cleanup(struct net *net)
1488 {
1489         EnterFunction(2);
1490         /* Check for "full" addressed entries */
1491         mutex_lock(&__ip_vs_mutex);
1492         ip_vs_flush(net);
1493         mutex_unlock(&__ip_vs_mutex);
1494         LeaveFunction(2);
1495 }
1496 /*
1497  * Release dst hold by dst_cache
1498  */
1499 static inline void
1500 __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1501 {
1502         spin_lock_bh(&dest->dst_lock);
1503         if (dest->dst_cache && dest->dst_cache->dev == dev) {
1504                 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1505                               dev->name,
1506                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1507                               ntohs(dest->port),
1508                               atomic_read(&dest->refcnt));
1509                 ip_vs_dst_reset(dest);
1510         }
1511         spin_unlock_bh(&dest->dst_lock);
1512
1513 }
1514 /*
1515  * Netdev event receiver
1516  * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1517  * a device that is "unregister" it must be released.
1518  */
1519 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1520                             void *ptr)
1521 {
1522         struct net_device *dev = ptr;
1523         struct net *net = dev_net(dev);
1524         struct ip_vs_service *svc;
1525         struct ip_vs_dest *dest;
1526         unsigned int idx;
1527
1528         if (event != NETDEV_UNREGISTER)
1529                 return NOTIFY_DONE;
1530         IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1531         EnterFunction(2);
1532         mutex_lock(&__ip_vs_mutex);
1533         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1534                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1535                         if (net_eq(svc->net, net)) {
1536                                 list_for_each_entry(dest, &svc->destinations,
1537                                                     n_list) {
1538                                         __ip_vs_dev_reset(dest, dev);
1539                                 }
1540                         }
1541                 }
1542
1543                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1544                         if (net_eq(svc->net, net)) {
1545                                 list_for_each_entry(dest, &svc->destinations,
1546                                                     n_list) {
1547                                         __ip_vs_dev_reset(dest, dev);
1548                                 }
1549                         }
1550
1551                 }
1552         }
1553
1554         list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) {
1555                 __ip_vs_dev_reset(dest, dev);
1556         }
1557         mutex_unlock(&__ip_vs_mutex);
1558         LeaveFunction(2);
1559         return NOTIFY_DONE;
1560 }
1561
1562 /*
1563  *      Zero counters in a service or all services
1564  */
1565 static int ip_vs_zero_service(struct ip_vs_service *svc)
1566 {
1567         struct ip_vs_dest *dest;
1568
1569         write_lock_bh(&__ip_vs_svc_lock);
1570         list_for_each_entry(dest, &svc->destinations, n_list) {
1571                 ip_vs_zero_stats(&dest->stats);
1572         }
1573         ip_vs_zero_stats(&svc->stats);
1574         write_unlock_bh(&__ip_vs_svc_lock);
1575         return 0;
1576 }
1577
1578 static int ip_vs_zero_all(struct net *net)
1579 {
1580         int idx;
1581         struct ip_vs_service *svc;
1582
1583         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1584                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1585                         if (net_eq(svc->net, net))
1586                                 ip_vs_zero_service(svc);
1587                 }
1588         }
1589
1590         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1591                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1592                         if (net_eq(svc->net, net))
1593                                 ip_vs_zero_service(svc);
1594                 }
1595         }
1596
1597         ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1598         return 0;
1599 }
1600
1601 #ifdef CONFIG_SYSCTL
1602
1603 static int zero;
1604 static int three = 3;
1605
1606 static int
1607 proc_do_defense_mode(ctl_table *table, int write,
1608                      void __user *buffer, size_t *lenp, loff_t *ppos)
1609 {
1610         struct net *net = current->nsproxy->net_ns;
1611         int *valp = table->data;
1612         int val = *valp;
1613         int rc;
1614
1615         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1616         if (write && (*valp != val)) {
1617                 if ((*valp < 0) || (*valp > 3)) {
1618                         /* Restore the correct value */
1619                         *valp = val;
1620                 } else {
1621                         update_defense_level(net_ipvs(net));
1622                 }
1623         }
1624         return rc;
1625 }
1626
1627 static int
1628 proc_do_sync_threshold(ctl_table *table, int write,
1629                        void __user *buffer, size_t *lenp, loff_t *ppos)
1630 {
1631         int *valp = table->data;
1632         int val[2];
1633         int rc;
1634
1635         /* backup the value first */
1636         memcpy(val, valp, sizeof(val));
1637
1638         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1639         if (write && (valp[0] < 0 || valp[1] < 0 ||
1640             (valp[0] >= valp[1] && valp[1]))) {
1641                 /* Restore the correct value */
1642                 memcpy(valp, val, sizeof(val));
1643         }
1644         return rc;
1645 }
1646
1647 static int
1648 proc_do_sync_mode(ctl_table *table, int write,
1649                      void __user *buffer, size_t *lenp, loff_t *ppos)
1650 {
1651         int *valp = table->data;
1652         int val = *valp;
1653         int rc;
1654
1655         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1656         if (write && (*valp != val)) {
1657                 if ((*valp < 0) || (*valp > 1)) {
1658                         /* Restore the correct value */
1659                         *valp = val;
1660                 }
1661         }
1662         return rc;
1663 }
1664
1665 static int
1666 proc_do_sync_ports(ctl_table *table, int write,
1667                    void __user *buffer, size_t *lenp, loff_t *ppos)
1668 {
1669         int *valp = table->data;
1670         int val = *valp;
1671         int rc;
1672
1673         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1674         if (write && (*valp != val)) {
1675                 if (*valp < 1 || !is_power_of_2(*valp)) {
1676                         /* Restore the correct value */
1677                         *valp = val;
1678                 }
1679         }
1680         return rc;
1681 }
1682
1683 /*
1684  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1685  *      Do not change order or insert new entries without
1686  *      align with netns init in ip_vs_control_net_init()
1687  */
1688
1689 static struct ctl_table vs_vars[] = {
1690         {
1691                 .procname       = "amemthresh",
1692                 .maxlen         = sizeof(int),
1693                 .mode           = 0644,
1694                 .proc_handler   = proc_dointvec,
1695         },
1696         {
1697                 .procname       = "am_droprate",
1698                 .maxlen         = sizeof(int),
1699                 .mode           = 0644,
1700                 .proc_handler   = proc_dointvec,
1701         },
1702         {
1703                 .procname       = "drop_entry",
1704                 .maxlen         = sizeof(int),
1705                 .mode           = 0644,
1706                 .proc_handler   = proc_do_defense_mode,
1707         },
1708         {
1709                 .procname       = "drop_packet",
1710                 .maxlen         = sizeof(int),
1711                 .mode           = 0644,
1712                 .proc_handler   = proc_do_defense_mode,
1713         },
1714 #ifdef CONFIG_IP_VS_NFCT
1715         {
1716                 .procname       = "conntrack",
1717                 .maxlen         = sizeof(int),
1718                 .mode           = 0644,
1719                 .proc_handler   = &proc_dointvec,
1720         },
1721 #endif
1722         {
1723                 .procname       = "secure_tcp",
1724                 .maxlen         = sizeof(int),
1725                 .mode           = 0644,
1726                 .proc_handler   = proc_do_defense_mode,
1727         },
1728         {
1729                 .procname       = "snat_reroute",
1730                 .maxlen         = sizeof(int),
1731                 .mode           = 0644,
1732                 .proc_handler   = &proc_dointvec,
1733         },
1734         {
1735                 .procname       = "sync_version",
1736                 .maxlen         = sizeof(int),
1737                 .mode           = 0644,
1738                 .proc_handler   = &proc_do_sync_mode,
1739         },
1740         {
1741                 .procname       = "sync_ports",
1742                 .maxlen         = sizeof(int),
1743                 .mode           = 0644,
1744                 .proc_handler   = &proc_do_sync_ports,
1745         },
1746         {
1747                 .procname       = "sync_qlen_max",
1748                 .maxlen         = sizeof(int),
1749                 .mode           = 0644,
1750                 .proc_handler   = proc_dointvec,
1751         },
1752         {
1753                 .procname       = "sync_sock_size",
1754                 .maxlen         = sizeof(int),
1755                 .mode           = 0644,
1756                 .proc_handler   = proc_dointvec,
1757         },
1758         {
1759                 .procname       = "cache_bypass",
1760                 .maxlen         = sizeof(int),
1761                 .mode           = 0644,
1762                 .proc_handler   = proc_dointvec,
1763         },
1764         {
1765                 .procname       = "expire_nodest_conn",
1766                 .maxlen         = sizeof(int),
1767                 .mode           = 0644,
1768                 .proc_handler   = proc_dointvec,
1769         },
1770         {
1771                 .procname       = "expire_quiescent_template",
1772                 .maxlen         = sizeof(int),
1773                 .mode           = 0644,
1774                 .proc_handler   = proc_dointvec,
1775         },
1776         {
1777                 .procname       = "sync_threshold",
1778                 .maxlen         =
1779                         sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1780                 .mode           = 0644,
1781                 .proc_handler   = proc_do_sync_threshold,
1782         },
1783         {
1784                 .procname       = "sync_refresh_period",
1785                 .maxlen         = sizeof(int),
1786                 .mode           = 0644,
1787                 .proc_handler   = proc_dointvec_jiffies,
1788         },
1789         {
1790                 .procname       = "sync_retries",
1791                 .maxlen         = sizeof(int),
1792                 .mode           = 0644,
1793                 .proc_handler   = proc_dointvec_minmax,
1794                 .extra1         = &zero,
1795                 .extra2         = &three,
1796         },
1797         {
1798                 .procname       = "nat_icmp_send",
1799                 .maxlen         = sizeof(int),
1800                 .mode           = 0644,
1801                 .proc_handler   = proc_dointvec,
1802         },
1803 #ifdef CONFIG_IP_VS_DEBUG
1804         {
1805                 .procname       = "debug_level",
1806                 .data           = &sysctl_ip_vs_debug_level,
1807                 .maxlen         = sizeof(int),
1808                 .mode           = 0644,
1809                 .proc_handler   = proc_dointvec,
1810         },
1811 #endif
1812 #if 0
1813         {
1814                 .procname       = "timeout_established",
1815                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1816                 .maxlen         = sizeof(int),
1817                 .mode           = 0644,
1818                 .proc_handler   = proc_dointvec_jiffies,
1819         },
1820         {
1821                 .procname       = "timeout_synsent",
1822                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1823                 .maxlen         = sizeof(int),
1824                 .mode           = 0644,
1825                 .proc_handler   = proc_dointvec_jiffies,
1826         },
1827         {
1828                 .procname       = "timeout_synrecv",
1829                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1830                 .maxlen         = sizeof(int),
1831                 .mode           = 0644,
1832                 .proc_handler   = proc_dointvec_jiffies,
1833         },
1834         {
1835                 .procname       = "timeout_finwait",
1836                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1837                 .maxlen         = sizeof(int),
1838                 .mode           = 0644,
1839                 .proc_handler   = proc_dointvec_jiffies,
1840         },
1841         {
1842                 .procname       = "timeout_timewait",
1843                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1844                 .maxlen         = sizeof(int),
1845                 .mode           = 0644,
1846                 .proc_handler   = proc_dointvec_jiffies,
1847         },
1848         {
1849                 .procname       = "timeout_close",
1850                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1851                 .maxlen         = sizeof(int),
1852                 .mode           = 0644,
1853                 .proc_handler   = proc_dointvec_jiffies,
1854         },
1855         {
1856                 .procname       = "timeout_closewait",
1857                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1858                 .maxlen         = sizeof(int),
1859                 .mode           = 0644,
1860                 .proc_handler   = proc_dointvec_jiffies,
1861         },
1862         {
1863                 .procname       = "timeout_lastack",
1864                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1865                 .maxlen         = sizeof(int),
1866                 .mode           = 0644,
1867                 .proc_handler   = proc_dointvec_jiffies,
1868         },
1869         {
1870                 .procname       = "timeout_listen",
1871                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1872                 .maxlen         = sizeof(int),
1873                 .mode           = 0644,
1874                 .proc_handler   = proc_dointvec_jiffies,
1875         },
1876         {
1877                 .procname       = "timeout_synack",
1878                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1879                 .maxlen         = sizeof(int),
1880                 .mode           = 0644,
1881                 .proc_handler   = proc_dointvec_jiffies,
1882         },
1883         {
1884                 .procname       = "timeout_udp",
1885                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1886                 .maxlen         = sizeof(int),
1887                 .mode           = 0644,
1888                 .proc_handler   = proc_dointvec_jiffies,
1889         },
1890         {
1891                 .procname       = "timeout_icmp",
1892                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1893                 .maxlen         = sizeof(int),
1894                 .mode           = 0644,
1895                 .proc_handler   = proc_dointvec_jiffies,
1896         },
1897 #endif
1898         { }
1899 };
1900
1901 #endif
1902
1903 #ifdef CONFIG_PROC_FS
1904
1905 struct ip_vs_iter {
1906         struct seq_net_private p;  /* Do not move this, netns depends upon it*/
1907         struct list_head *table;
1908         int bucket;
1909 };
1910
1911 /*
1912  *      Write the contents of the VS rule table to a PROCfs file.
1913  *      (It is kept just for backward compatibility)
1914  */
1915 static inline const char *ip_vs_fwd_name(unsigned int flags)
1916 {
1917         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1918         case IP_VS_CONN_F_LOCALNODE:
1919                 return "Local";
1920         case IP_VS_CONN_F_TUNNEL:
1921                 return "Tunnel";
1922         case IP_VS_CONN_F_DROUTE:
1923                 return "Route";
1924         default:
1925                 return "Masq";
1926         }
1927 }
1928
1929
1930 /* Get the Nth entry in the two lists */
1931 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1932 {
1933         struct net *net = seq_file_net(seq);
1934         struct ip_vs_iter *iter = seq->private;
1935         int idx;
1936         struct ip_vs_service *svc;
1937
1938         /* look in hash by protocol */
1939         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1940                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1941                         if (net_eq(svc->net, net) && pos-- == 0) {
1942                                 iter->table = ip_vs_svc_table;
1943                                 iter->bucket = idx;
1944                                 return svc;
1945                         }
1946                 }
1947         }
1948
1949         /* keep looking in fwmark */
1950         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1951                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1952                         if (net_eq(svc->net, net) && pos-- == 0) {
1953                                 iter->table = ip_vs_svc_fwm_table;
1954                                 iter->bucket = idx;
1955                                 return svc;
1956                         }
1957                 }
1958         }
1959
1960         return NULL;
1961 }
1962
1963 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1964 __acquires(__ip_vs_svc_lock)
1965 {
1966
1967         read_lock_bh(&__ip_vs_svc_lock);
1968         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1969 }
1970
1971
1972 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1973 {
1974         struct list_head *e;
1975         struct ip_vs_iter *iter;
1976         struct ip_vs_service *svc;
1977
1978         ++*pos;
1979         if (v == SEQ_START_TOKEN)
1980                 return ip_vs_info_array(seq,0);
1981
1982         svc = v;
1983         iter = seq->private;
1984
1985         if (iter->table == ip_vs_svc_table) {
1986                 /* next service in table hashed by protocol */
1987                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1988                         return list_entry(e, struct ip_vs_service, s_list);
1989
1990
1991                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1992                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1993                                             s_list) {
1994                                 return svc;
1995                         }
1996                 }
1997
1998                 iter->table = ip_vs_svc_fwm_table;
1999                 iter->bucket = -1;
2000                 goto scan_fwmark;
2001         }
2002
2003         /* next service in hashed by fwmark */
2004         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2005                 return list_entry(e, struct ip_vs_service, f_list);
2006
2007  scan_fwmark:
2008         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2009                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2010                                     f_list)
2011                         return svc;
2012         }
2013
2014         return NULL;
2015 }
2016
2017 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2018 __releases(__ip_vs_svc_lock)
2019 {
2020         read_unlock_bh(&__ip_vs_svc_lock);
2021 }
2022
2023
2024 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2025 {
2026         if (v == SEQ_START_TOKEN) {
2027                 seq_printf(seq,
2028                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
2029                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2030                 seq_puts(seq,
2031                          "Prot LocalAddress:Port Scheduler Flags\n");
2032                 seq_puts(seq,
2033                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2034         } else {
2035                 const struct ip_vs_service *svc = v;
2036                 const struct ip_vs_iter *iter = seq->private;
2037                 const struct ip_vs_dest *dest;
2038
2039                 if (iter->table == ip_vs_svc_table) {
2040 #ifdef CONFIG_IP_VS_IPV6
2041                         if (svc->af == AF_INET6)
2042                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
2043                                            ip_vs_proto_name(svc->protocol),
2044                                            &svc->addr.in6,
2045                                            ntohs(svc->port),
2046                                            svc->scheduler->name);
2047                         else
2048 #endif
2049                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
2050                                            ip_vs_proto_name(svc->protocol),
2051                                            ntohl(svc->addr.ip),
2052                                            ntohs(svc->port),
2053                                            svc->scheduler->name,
2054                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2055                 } else {
2056                         seq_printf(seq, "FWM  %08X %s %s",
2057                                    svc->fwmark, svc->scheduler->name,
2058                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2059                 }
2060
2061                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2062                         seq_printf(seq, "persistent %d %08X\n",
2063                                 svc->timeout,
2064                                 ntohl(svc->netmask));
2065                 else
2066                         seq_putc(seq, '\n');
2067
2068                 list_for_each_entry(dest, &svc->destinations, n_list) {
2069 #ifdef CONFIG_IP_VS_IPV6
2070                         if (dest->af == AF_INET6)
2071                                 seq_printf(seq,
2072                                            "  -> [%pI6]:%04X"
2073                                            "      %-7s %-6d %-10d %-10d\n",
2074                                            &dest->addr.in6,
2075                                            ntohs(dest->port),
2076                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2077                                            atomic_read(&dest->weight),
2078                                            atomic_read(&dest->activeconns),
2079                                            atomic_read(&dest->inactconns));
2080                         else
2081 #endif
2082                                 seq_printf(seq,
2083                                            "  -> %08X:%04X      "
2084                                            "%-7s %-6d %-10d %-10d\n",
2085                                            ntohl(dest->addr.ip),
2086                                            ntohs(dest->port),
2087                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2088                                            atomic_read(&dest->weight),
2089                                            atomic_read(&dest->activeconns),
2090                                            atomic_read(&dest->inactconns));
2091
2092                 }
2093         }
2094         return 0;
2095 }
2096
2097 static const struct seq_operations ip_vs_info_seq_ops = {
2098         .start = ip_vs_info_seq_start,
2099         .next  = ip_vs_info_seq_next,
2100         .stop  = ip_vs_info_seq_stop,
2101         .show  = ip_vs_info_seq_show,
2102 };
2103
2104 static int ip_vs_info_open(struct inode *inode, struct file *file)
2105 {
2106         return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2107                         sizeof(struct ip_vs_iter));
2108 }
2109
2110 static const struct file_operations ip_vs_info_fops = {
2111         .owner   = THIS_MODULE,
2112         .open    = ip_vs_info_open,
2113         .read    = seq_read,
2114         .llseek  = seq_lseek,
2115         .release = seq_release_net,
2116 };
2117
2118 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2119 {
2120         struct net *net = seq_file_single_net(seq);
2121         struct ip_vs_stats_user show;
2122
2123 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2124         seq_puts(seq,
2125                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
2126         seq_printf(seq,
2127                    "   Conns  Packets  Packets            Bytes            Bytes\n");
2128
2129         ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2130         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2131                    show.inpkts, show.outpkts,
2132                    (unsigned long long) show.inbytes,
2133                    (unsigned long long) show.outbytes);
2134
2135 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2136         seq_puts(seq,
2137                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2138         seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2139                         show.cps, show.inpps, show.outpps,
2140                         show.inbps, show.outbps);
2141
2142         return 0;
2143 }
2144
2145 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2146 {
2147         return single_open_net(inode, file, ip_vs_stats_show);
2148 }
2149
2150 static const struct file_operations ip_vs_stats_fops = {
2151         .owner = THIS_MODULE,
2152         .open = ip_vs_stats_seq_open,
2153         .read = seq_read,
2154         .llseek = seq_lseek,
2155         .release = single_release_net,
2156 };
2157
2158 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2159 {
2160         struct net *net = seq_file_single_net(seq);
2161         struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2162         struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2163         struct ip_vs_stats_user rates;
2164         int i;
2165
2166 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
2167         seq_puts(seq,
2168                  "       Total Incoming Outgoing         Incoming         Outgoing\n");
2169         seq_printf(seq,
2170                    "CPU    Conns  Packets  Packets            Bytes            Bytes\n");
2171
2172         for_each_possible_cpu(i) {
2173                 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2174                 unsigned int start;
2175                 __u64 inbytes, outbytes;
2176
2177                 do {
2178                         start = u64_stats_fetch_begin_bh(&u->syncp);
2179                         inbytes = u->ustats.inbytes;
2180                         outbytes = u->ustats.outbytes;
2181                 } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2182
2183                 seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2184                            i, u->ustats.conns, u->ustats.inpkts,
2185                            u->ustats.outpkts, (__u64)inbytes,
2186                            (__u64)outbytes);
2187         }
2188
2189         spin_lock_bh(&tot_stats->lock);
2190
2191         seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
2192                    tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2193                    tot_stats->ustats.outpkts,
2194                    (unsigned long long) tot_stats->ustats.inbytes,
2195                    (unsigned long long) tot_stats->ustats.outbytes);
2196
2197         ip_vs_read_estimator(&rates, tot_stats);
2198
2199         spin_unlock_bh(&tot_stats->lock);
2200
2201 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2202         seq_puts(seq,
2203                    "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
2204         seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
2205                         rates.cps,
2206                         rates.inpps,
2207                         rates.outpps,
2208                         rates.inbps,
2209                         rates.outbps);
2210
2211         return 0;
2212 }
2213
2214 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2215 {
2216         return single_open_net(inode, file, ip_vs_stats_percpu_show);
2217 }
2218
2219 static const struct file_operations ip_vs_stats_percpu_fops = {
2220         .owner = THIS_MODULE,
2221         .open = ip_vs_stats_percpu_seq_open,
2222         .read = seq_read,
2223         .llseek = seq_lseek,
2224         .release = single_release_net,
2225 };
2226 #endif
2227
2228 /*
2229  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2230  */
2231 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2232 {
2233 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2234         struct ip_vs_proto_data *pd;
2235 #endif
2236
2237         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2238                   u->tcp_timeout,
2239                   u->tcp_fin_timeout,
2240                   u->udp_timeout);
2241
2242 #ifdef CONFIG_IP_VS_PROTO_TCP
2243         if (u->tcp_timeout) {
2244                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2245                 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
2246                         = u->tcp_timeout * HZ;
2247         }
2248
2249         if (u->tcp_fin_timeout) {
2250                 pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2251                 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
2252                         = u->tcp_fin_timeout * HZ;
2253         }
2254 #endif
2255
2256 #ifdef CONFIG_IP_VS_PROTO_UDP
2257         if (u->udp_timeout) {
2258                 pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2259                 pd->timeout_table[IP_VS_UDP_S_NORMAL]
2260                         = u->udp_timeout * HZ;
2261         }
2262 #endif
2263         return 0;
2264 }
2265
2266
2267 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2268 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2269 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2270                                  sizeof(struct ip_vs_dest_user))
2271 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2272 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2273 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2274
2275 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2276         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2277         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2278         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2279         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2280         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2281         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2282         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2283         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2284         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2285         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2286         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2287 };
2288
2289 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2290                                   struct ip_vs_service_user *usvc_compat)
2291 {
2292         memset(usvc, 0, sizeof(*usvc));
2293
2294         usvc->af                = AF_INET;
2295         usvc->protocol          = usvc_compat->protocol;
2296         usvc->addr.ip           = usvc_compat->addr;
2297         usvc->port              = usvc_compat->port;
2298         usvc->fwmark            = usvc_compat->fwmark;
2299
2300         /* Deep copy of sched_name is not needed here */
2301         usvc->sched_name        = usvc_compat->sched_name;
2302
2303         usvc->flags             = usvc_compat->flags;
2304         usvc->timeout           = usvc_compat->timeout;
2305         usvc->netmask           = usvc_compat->netmask;
2306 }
2307
2308 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2309                                    struct ip_vs_dest_user *udest_compat)
2310 {
2311         memset(udest, 0, sizeof(*udest));
2312
2313         udest->addr.ip          = udest_compat->addr;
2314         udest->port             = udest_compat->port;
2315         udest->conn_flags       = udest_compat->conn_flags;
2316         udest->weight           = udest_compat->weight;
2317         udest->u_threshold      = udest_compat->u_threshold;
2318         udest->l_threshold      = udest_compat->l_threshold;
2319 }
2320
2321 static int
2322 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2323 {
2324         struct net *net = sock_net(sk);
2325         int ret;
2326         unsigned char arg[MAX_ARG_LEN];
2327         struct ip_vs_service_user *usvc_compat;
2328         struct ip_vs_service_user_kern usvc;
2329         struct ip_vs_service *svc;
2330         struct ip_vs_dest_user *udest_compat;
2331         struct ip_vs_dest_user_kern udest;
2332         struct netns_ipvs *ipvs = net_ipvs(net);
2333
2334         if (!capable(CAP_NET_ADMIN))
2335                 return -EPERM;
2336
2337         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2338                 return -EINVAL;
2339         if (len < 0 || len >  MAX_ARG_LEN)
2340                 return -EINVAL;
2341         if (len != set_arglen[SET_CMDID(cmd)]) {
2342                 pr_err("set_ctl: len %u != %u\n",
2343                        len, set_arglen[SET_CMDID(cmd)]);
2344                 return -EINVAL;
2345         }
2346
2347         if (copy_from_user(arg, user, len) != 0)
2348                 return -EFAULT;
2349
2350         /* increase the module use count */
2351         ip_vs_use_count_inc();
2352
2353         /* Handle daemons since they have another lock */
2354         if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2355             cmd == IP_VS_SO_SET_STOPDAEMON) {
2356                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2357
2358                 if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2359                         ret = -ERESTARTSYS;
2360                         goto out_dec;
2361                 }
2362                 if (cmd == IP_VS_SO_SET_STARTDAEMON)
2363                         ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2364                                                 dm->syncid);
2365                 else
2366                         ret = stop_sync_thread(net, dm->state);
2367                 mutex_unlock(&ipvs->sync_mutex);
2368                 goto out_dec;
2369         }
2370
2371         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2372                 ret = -ERESTARTSYS;
2373                 goto out_dec;
2374         }
2375
2376         if (cmd == IP_VS_SO_SET_FLUSH) {
2377                 /* Flush the virtual service */
2378                 ret = ip_vs_flush(net);
2379                 goto out_unlock;
2380         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2381                 /* Set timeout values for (tcp tcpfin udp) */
2382                 ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2383                 goto out_unlock;
2384         }
2385
2386         usvc_compat = (struct ip_vs_service_user *)arg;
2387         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2388
2389         /* We only use the new structs internally, so copy userspace compat
2390          * structs to extended internal versions */
2391         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2392         ip_vs_copy_udest_compat(&udest, udest_compat);
2393
2394         if (cmd == IP_VS_SO_SET_ZERO) {
2395                 /* if no service address is set, zero counters in all */
2396                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2397                         ret = ip_vs_zero_all(net);
2398                         goto out_unlock;
2399                 }
2400         }
2401
2402         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2403         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2404             usvc.protocol != IPPROTO_SCTP) {
2405                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2406                        usvc.protocol, &usvc.addr.ip,
2407                        ntohs(usvc.port), usvc.sched_name);
2408                 ret = -EFAULT;
2409                 goto out_unlock;
2410         }
2411
2412         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2413         if (usvc.fwmark == 0)
2414                 svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2415                                            &usvc.addr, usvc.port);
2416         else
2417                 svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2418
2419         if (cmd != IP_VS_SO_SET_ADD
2420             && (svc == NULL || svc->protocol != usvc.protocol)) {
2421                 ret = -ESRCH;
2422                 goto out_unlock;
2423         }
2424
2425         switch (cmd) {
2426         case IP_VS_SO_SET_ADD:
2427                 if (svc != NULL)
2428                         ret = -EEXIST;
2429                 else
2430                         ret = ip_vs_add_service(net, &usvc, &svc);
2431                 break;
2432         case IP_VS_SO_SET_EDIT:
2433                 ret = ip_vs_edit_service(svc, &usvc);
2434                 break;
2435         case IP_VS_SO_SET_DEL:
2436                 ret = ip_vs_del_service(svc);
2437                 if (!ret)
2438                         goto out_unlock;
2439                 break;
2440         case IP_VS_SO_SET_ZERO:
2441                 ret = ip_vs_zero_service(svc);
2442                 break;
2443         case IP_VS_SO_SET_ADDDEST:
2444                 ret = ip_vs_add_dest(svc, &udest);
2445                 break;
2446         case IP_VS_SO_SET_EDITDEST:
2447                 ret = ip_vs_edit_dest(svc, &udest);
2448                 break;
2449         case IP_VS_SO_SET_DELDEST:
2450                 ret = ip_vs_del_dest(svc, &udest);
2451                 break;
2452         default:
2453                 ret = -EINVAL;
2454         }
2455
2456   out_unlock:
2457         mutex_unlock(&__ip_vs_mutex);
2458   out_dec:
2459         /* decrease the module use count */
2460         ip_vs_use_count_dec();
2461
2462         return ret;
2463 }
2464
2465
2466 static void
2467 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2468 {
2469         dst->protocol = src->protocol;
2470         dst->addr = src->addr.ip;
2471         dst->port = src->port;
2472         dst->fwmark = src->fwmark;
2473         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2474         dst->flags = src->flags;
2475         dst->timeout = src->timeout / HZ;
2476         dst->netmask = src->netmask;
2477         dst->num_dests = src->num_dests;
2478         ip_vs_copy_stats(&dst->stats, &src->stats);
2479 }
2480
2481 static inline int
2482 __ip_vs_get_service_entries(struct net *net,
2483                             const struct ip_vs_get_services *get,
2484                             struct ip_vs_get_services __user *uptr)
2485 {
2486         int idx, count=0;
2487         struct ip_vs_service *svc;
2488         struct ip_vs_service_entry entry;
2489         int ret = 0;
2490
2491         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2492                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2493                         /* Only expose IPv4 entries to old interface */
2494                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2495                                 continue;
2496
2497                         if (count >= get->num_services)
2498                                 goto out;
2499                         memset(&entry, 0, sizeof(entry));
2500                         ip_vs_copy_service(&entry, svc);
2501                         if (copy_to_user(&uptr->entrytable[count],
2502                                          &entry, sizeof(entry))) {
2503                                 ret = -EFAULT;
2504                                 goto out;
2505                         }
2506                         count++;
2507                 }
2508         }
2509
2510         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2511                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2512                         /* Only expose IPv4 entries to old interface */
2513                         if (svc->af != AF_INET || !net_eq(svc->net, net))
2514                                 continue;
2515
2516                         if (count >= get->num_services)
2517                                 goto out;
2518                         memset(&entry, 0, sizeof(entry));
2519                         ip_vs_copy_service(&entry, svc);
2520                         if (copy_to_user(&uptr->entrytable[count],
2521                                          &entry, sizeof(entry))) {
2522                                 ret = -EFAULT;
2523                                 goto out;
2524                         }
2525                         count++;
2526                 }
2527         }
2528 out:
2529         return ret;
2530 }
2531
2532 static inline int
2533 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2534                          struct ip_vs_get_dests __user *uptr)
2535 {
2536         struct ip_vs_service *svc;
2537         union nf_inet_addr addr = { .ip = get->addr };
2538         int ret = 0;
2539
2540         if (get->fwmark)
2541                 svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2542         else
2543                 svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2544                                            get->port);
2545
2546         if (svc) {
2547                 int count = 0;
2548                 struct ip_vs_dest *dest;
2549                 struct ip_vs_dest_entry entry;
2550
2551                 list_for_each_entry(dest, &svc->destinations, n_list) {
2552                         if (count >= get->num_dests)
2553                                 break;
2554
2555                         entry.addr = dest->addr.ip;
2556                         entry.port = dest->port;
2557                         entry.conn_flags = atomic_read(&dest->conn_flags);
2558                         entry.weight = atomic_read(&dest->weight);
2559                         entry.u_threshold = dest->u_threshold;
2560                         entry.l_threshold = dest->l_threshold;
2561                         entry.activeconns = atomic_read(&dest->activeconns);
2562                         entry.inactconns = atomic_read(&dest->inactconns);
2563                         entry.persistconns = atomic_read(&dest->persistconns);
2564                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2565                         if (copy_to_user(&uptr->entrytable[count],
2566                                          &entry, sizeof(entry))) {
2567                                 ret = -EFAULT;
2568                                 break;
2569                         }
2570                         count++;
2571                 }
2572         } else
2573                 ret = -ESRCH;
2574         return ret;
2575 }
2576
2577 static inline void
2578 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2579 {
2580 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2581         struct ip_vs_proto_data *pd;
2582 #endif
2583
2584 #ifdef CONFIG_IP_VS_PROTO_TCP
2585         pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2586         u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2587         u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2588 #endif
2589 #ifdef CONFIG_IP_VS_PROTO_UDP
2590         pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2591         u->udp_timeout =
2592                         pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2593 #endif
2594 }
2595
2596
2597 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2598 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2599 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2600 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2601 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2602 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2603 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2604
2605 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2606         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2607         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2608         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2609         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2610         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2611         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2612         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2613 };
2614
2615 static int
2616 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2617 {
2618         unsigned char arg[128];
2619         int ret = 0;
2620         unsigned int copylen;
2621         struct net *net = sock_net(sk);
2622         struct netns_ipvs *ipvs = net_ipvs(net);
2623
2624         BUG_ON(!net);
2625         if (!capable(CAP_NET_ADMIN))
2626                 return -EPERM;
2627
2628         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2629                 return -EINVAL;
2630
2631         if (*len < get_arglen[GET_CMDID(cmd)]) {
2632                 pr_err("get_ctl: len %u < %u\n",
2633                        *len, get_arglen[GET_CMDID(cmd)]);
2634                 return -EINVAL;
2635         }
2636
2637         copylen = get_arglen[GET_CMDID(cmd)];
2638         if (copylen > 128)
2639                 return -EINVAL;
2640
2641         if (copy_from_user(arg, user, copylen) != 0)
2642                 return -EFAULT;
2643         /*
2644          * Handle daemons first since it has its own locking
2645          */
2646         if (cmd == IP_VS_SO_GET_DAEMON) {
2647                 struct ip_vs_daemon_user d[2];
2648
2649                 memset(&d, 0, sizeof(d));
2650                 if (mutex_lock_interruptible(&ipvs->sync_mutex))
2651                         return -ERESTARTSYS;
2652
2653                 if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2654                         d[0].state = IP_VS_STATE_MASTER;
2655                         strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2656                                 sizeof(d[0].mcast_ifn));
2657                         d[0].syncid = ipvs->master_syncid;
2658                 }
2659                 if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2660                         d[1].state = IP_VS_STATE_BACKUP;
2661                         strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2662                                 sizeof(d[1].mcast_ifn));
2663                         d[1].syncid = ipvs->backup_syncid;
2664                 }
2665                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2666                         ret = -EFAULT;
2667                 mutex_unlock(&ipvs->sync_mutex);
2668                 return ret;
2669         }
2670
2671         if (mutex_lock_interruptible(&__ip_vs_mutex))
2672                 return -ERESTARTSYS;
2673
2674         switch (cmd) {
2675         case IP_VS_SO_GET_VERSION:
2676         {
2677                 char buf[64];
2678
2679                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2680                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2681                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2682                         ret = -EFAULT;
2683                         goto out;
2684                 }
2685                 *len = strlen(buf)+1;
2686         }
2687         break;
2688
2689         case IP_VS_SO_GET_INFO:
2690         {
2691                 struct ip_vs_getinfo info;
2692                 info.version = IP_VS_VERSION_CODE;
2693                 info.size = ip_vs_conn_tab_size;
2694                 info.num_services = ipvs->num_services;
2695                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2696                         ret = -EFAULT;
2697         }
2698         break;
2699
2700         case IP_VS_SO_GET_SERVICES:
2701         {
2702                 struct ip_vs_get_services *get;
2703                 int size;
2704
2705                 get = (struct ip_vs_get_services *)arg;
2706                 size = sizeof(*get) +
2707                         sizeof(struct ip_vs_service_entry) * get->num_services;
2708                 if (*len != size) {
2709                         pr_err("length: %u != %u\n", *len, size);
2710                         ret = -EINVAL;
2711                         goto out;
2712                 }
2713                 ret = __ip_vs_get_service_entries(net, get, user);
2714         }
2715         break;
2716
2717         case IP_VS_SO_GET_SERVICE:
2718         {
2719                 struct ip_vs_service_entry *entry;
2720                 struct ip_vs_service *svc;
2721                 union nf_inet_addr addr;
2722
2723                 entry = (struct ip_vs_service_entry *)arg;
2724                 addr.ip = entry->addr;
2725                 if (entry->fwmark)
2726                         svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2727                 else
2728                         svc = __ip_vs_service_find(net, AF_INET,
2729                                                    entry->protocol, &addr,
2730                                                    entry->port);
2731                 if (svc) {
2732                         ip_vs_copy_service(entry, svc);
2733                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2734                                 ret = -EFAULT;
2735                 } else
2736                         ret = -ESRCH;
2737         }
2738         break;
2739
2740         case IP_VS_SO_GET_DESTS:
2741         {
2742                 struct ip_vs_get_dests *get;
2743                 int size;
2744
2745                 get = (struct ip_vs_get_dests *)arg;
2746                 size = sizeof(*get) +
2747                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2748                 if (*len != size) {
2749                         pr_err("length: %u != %u\n", *len, size);
2750                         ret = -EINVAL;
2751                         goto out;
2752                 }
2753                 ret = __ip_vs_get_dest_entries(net, get, user);
2754         }
2755         break;
2756
2757         case IP_VS_SO_GET_TIMEOUT:
2758         {
2759                 struct ip_vs_timeout_user t;
2760
2761                 __ip_vs_get_timeouts(net, &t);
2762                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2763                         ret = -EFAULT;
2764         }
2765         break;
2766
2767         default:
2768                 ret = -EINVAL;
2769         }
2770
2771 out:
2772         mutex_unlock(&__ip_vs_mutex);
2773         return ret;
2774 }
2775
2776
2777 static struct nf_sockopt_ops ip_vs_sockopts = {
2778         .pf             = PF_INET,
2779         .set_optmin     = IP_VS_BASE_CTL,
2780         .set_optmax     = IP_VS_SO_SET_MAX+1,
2781         .set            = do_ip_vs_set_ctl,
2782         .get_optmin     = IP_VS_BASE_CTL,
2783         .get_optmax     = IP_VS_SO_GET_MAX+1,
2784         .get            = do_ip_vs_get_ctl,
2785         .owner          = THIS_MODULE,
2786 };
2787
2788 /*
2789  * Generic Netlink interface
2790  */
2791
2792 /* IPVS genetlink family */
2793 static struct genl_family ip_vs_genl_family = {
2794         .id             = GENL_ID_GENERATE,
2795         .hdrsize        = 0,
2796         .name           = IPVS_GENL_NAME,
2797         .version        = IPVS_GENL_VERSION,
2798         .maxattr        = IPVS_CMD_MAX,
2799         .netnsok        = true,         /* Make ipvsadm to work on netns */
2800 };
2801
2802 /* Policy used for first-level command attributes */
2803 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2804         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2805         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2806         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2807         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2808         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2809         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2810 };
2811
2812 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2813 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2814         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2815         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2816                                             .len = IP_VS_IFNAME_MAXLEN },
2817         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2818 };
2819
2820 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2821 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2822         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2823         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2824         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2825                                             .len = sizeof(union nf_inet_addr) },
2826         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2827         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2828         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2829                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2830         [IPVS_SVC_ATTR_PE_NAME]         = { .type = NLA_NUL_STRING,
2831                                             .len = IP_VS_PENAME_MAXLEN },
2832         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2833                                             .len = sizeof(struct ip_vs_flags) },
2834         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2835         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2836         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2837 };
2838
2839 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2840 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2841         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2842                                             .len = sizeof(union nf_inet_addr) },
2843         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2844         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2845         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2846         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2847         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2848         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2849         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2850         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2851         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2852 };
2853
2854 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2855                                  struct ip_vs_stats *stats)
2856 {
2857         struct ip_vs_stats_user ustats;
2858         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2859         if (!nl_stats)
2860                 return -EMSGSIZE;
2861
2862         ip_vs_copy_stats(&ustats, stats);
2863
2864         if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2865             nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2866             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2867             nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2868             nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2869             nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2870             nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2871             nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2872             nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2873             nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2874                 goto nla_put_failure;
2875         nla_nest_end(skb, nl_stats);
2876
2877         return 0;
2878
2879 nla_put_failure:
2880         nla_nest_cancel(skb, nl_stats);
2881         return -EMSGSIZE;
2882 }
2883
2884 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2885                                    struct ip_vs_service *svc)
2886 {
2887         struct nlattr *nl_service;
2888         struct ip_vs_flags flags = { .flags = svc->flags,
2889                                      .mask = ~0 };
2890
2891         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2892         if (!nl_service)
2893                 return -EMSGSIZE;
2894
2895         if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2896                 goto nla_put_failure;
2897         if (svc->fwmark) {
2898                 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2899                         goto nla_put_failure;
2900         } else {
2901                 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2902                     nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2903                     nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2904                         goto nla_put_failure;
2905         }
2906
2907         if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2908             (svc->pe &&
2909              nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2910             nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2911             nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2912             nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2913                 goto nla_put_failure;
2914         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2915                 goto nla_put_failure;
2916
2917         nla_nest_end(skb, nl_service);
2918
2919         return 0;
2920
2921 nla_put_failure:
2922         nla_nest_cancel(skb, nl_service);
2923         return -EMSGSIZE;
2924 }
2925
2926 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2927                                    struct ip_vs_service *svc,
2928                                    struct netlink_callback *cb)
2929 {
2930         void *hdr;
2931
2932         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2933                           &ip_vs_genl_family, NLM_F_MULTI,
2934                           IPVS_CMD_NEW_SERVICE);
2935         if (!hdr)
2936                 return -EMSGSIZE;
2937
2938         if (ip_vs_genl_fill_service(skb, svc) < 0)
2939                 goto nla_put_failure;
2940
2941         return genlmsg_end(skb, hdr);
2942
2943 nla_put_failure:
2944         genlmsg_cancel(skb, hdr);
2945         return -EMSGSIZE;
2946 }
2947
2948 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2949                                     struct netlink_callback *cb)
2950 {
2951         int idx = 0, i;
2952         int start = cb->args[0];
2953         struct ip_vs_service *svc;
2954         struct net *net = skb_sknet(skb);
2955
2956         mutex_lock(&__ip_vs_mutex);
2957         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2958                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2959                         if (++idx <= start || !net_eq(svc->net, net))
2960                                 continue;
2961                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2962                                 idx--;
2963                                 goto nla_put_failure;
2964                         }
2965                 }
2966         }
2967
2968         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2969                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2970                         if (++idx <= start || !net_eq(svc->net, net))
2971                                 continue;
2972                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2973                                 idx--;
2974                                 goto nla_put_failure;
2975                         }
2976                 }
2977         }
2978
2979 nla_put_failure:
2980         mutex_unlock(&__ip_vs_mutex);
2981         cb->args[0] = idx;
2982
2983         return skb->len;
2984 }
2985
2986 static int ip_vs_genl_parse_service(struct net *net,
2987                                     struct ip_vs_service_user_kern *usvc,
2988                                     struct nlattr *nla, int full_entry,
2989                                     struct ip_vs_service **ret_svc)
2990 {
2991         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2992         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2993         struct ip_vs_service *svc;
2994
2995         /* Parse mandatory identifying service fields first */
2996         if (nla == NULL ||
2997             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2998                 return -EINVAL;
2999
3000         nla_af          = attrs[IPVS_SVC_ATTR_AF];
3001         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
3002         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
3003         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
3004         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
3005
3006         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3007                 return -EINVAL;
3008
3009         memset(usvc, 0, sizeof(*usvc));
3010
3011         usvc->af = nla_get_u16(nla_af);
3012 #ifdef CONFIG_IP_VS_IPV6
3013         if (usvc->af != AF_INET && usvc->af != AF_INET6)
3014 #else
3015         if (usvc->af != AF_INET)
3016 #endif
3017                 return -EAFNOSUPPORT;
3018
3019         if (nla_fwmark) {
3020                 usvc->protocol = IPPROTO_TCP;
3021                 usvc->fwmark = nla_get_u32(nla_fwmark);
3022         } else {
3023                 usvc->protocol = nla_get_u16(nla_protocol);
3024                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3025                 usvc->port = nla_get_u16(nla_port);
3026                 usvc->fwmark = 0;
3027         }
3028
3029         if (usvc->fwmark)
3030                 svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3031         else
3032                 svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3033                                            &usvc->addr, usvc->port);
3034         *ret_svc = svc;
3035
3036         /* If a full entry was requested, check for the additional fields */
3037         if (full_entry) {
3038                 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3039                               *nla_netmask;
3040                 struct ip_vs_flags flags;
3041
3042                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3043                 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3044                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3045                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3046                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3047
3048                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3049                         return -EINVAL;
3050
3051                 nla_memcpy(&flags, nla_flags, sizeof(flags));
3052
3053                 /* prefill flags from service if it already exists */
3054                 if (svc)
3055                         usvc->flags = svc->flags;
3056
3057                 /* set new flags from userland */
3058                 usvc->flags = (usvc->flags & ~flags.mask) |
3059                               (flags.flags & flags.mask);
3060                 usvc->sched_name = nla_data(nla_sched);
3061                 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3062                 usvc->timeout = nla_get_u32(nla_timeout);
3063                 usvc->netmask = nla_get_u32(nla_netmask);
3064         }
3065
3066         return 0;
3067 }
3068
3069 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3070                                                      struct nlattr *nla)
3071 {
3072         struct ip_vs_service_user_kern usvc;
3073         struct ip_vs_service *svc;
3074         int ret;
3075
3076         ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3077         return ret ? ERR_PTR(ret) : svc;
3078 }
3079
3080 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3081 {
3082         struct nlattr *nl_dest;
3083
3084         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3085         if (!nl_dest)
3086                 return -EMSGSIZE;
3087
3088         if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3089             nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3090             nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3091                         (atomic_read(&dest->conn_flags) &
3092                          IP_VS_CONN_F_FWD_MASK)) ||
3093             nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3094                         atomic_read(&dest->weight)) ||
3095             nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3096             nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3097             nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3098                         atomic_read(&dest->activeconns)) ||
3099             nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3100                         atomic_read(&dest->inactconns)) ||
3101             nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3102                         atomic_read(&dest->persistconns)))
3103                 goto nla_put_failure;
3104         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3105                 goto nla_put_failure;
3106
3107         nla_nest_end(skb, nl_dest);
3108
3109         return 0;
3110
3111 nla_put_failure:
3112         nla_nest_cancel(skb, nl_dest);
3113         return -EMSGSIZE;
3114 }
3115
3116 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3117                                 struct netlink_callback *cb)
3118 {
3119         void *hdr;
3120
3121         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3122                           &ip_vs_genl_family, NLM_F_MULTI,
3123                           IPVS_CMD_NEW_DEST);
3124         if (!hdr)
3125                 return -EMSGSIZE;
3126
3127         if (ip_vs_genl_fill_dest(skb, dest) < 0)
3128                 goto nla_put_failure;
3129
3130         return genlmsg_end(skb, hdr);
3131
3132 nla_put_failure:
3133         genlmsg_cancel(skb, hdr);
3134         return -EMSGSIZE;
3135 }
3136
3137 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3138                                  struct netlink_callback *cb)
3139 {
3140         int idx = 0;
3141         int start = cb->args[0];
3142         struct ip_vs_service *svc;
3143         struct ip_vs_dest *dest;
3144         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3145         struct net *net = skb_sknet(skb);
3146
3147         mutex_lock(&__ip_vs_mutex);
3148
3149         /* Try to find the service for which to dump destinations */
3150         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3151                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3152                 goto out_err;
3153
3154
3155         svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3156         if (IS_ERR(svc) || svc == NULL)
3157                 goto out_err;
3158
3159         /* Dump the destinations */
3160         list_for_each_entry(dest, &svc->destinations, n_list) {
3161                 if (++idx <= start)
3162                         continue;
3163                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3164                         idx--;
3165                         goto nla_put_failure;
3166                 }
3167         }
3168
3169 nla_put_failure:
3170         cb->args[0] = idx;
3171
3172 out_err:
3173         mutex_unlock(&__ip_vs_mutex);
3174
3175         return skb->len;
3176 }
3177
3178 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3179                                  struct nlattr *nla, int full_entry)
3180 {
3181         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3182         struct nlattr *nla_addr, *nla_port;
3183
3184         /* Parse mandatory identifying destination fields first */
3185         if (nla == NULL ||
3186             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3187                 return -EINVAL;
3188
3189         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
3190         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
3191
3192         if (!(nla_addr && nla_port))
3193                 return -EINVAL;
3194
3195         memset(udest, 0, sizeof(*udest));
3196
3197         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3198         udest->port = nla_get_u16(nla_port);
3199
3200         /* If a full entry was requested, check for the additional fields */
3201         if (full_entry) {
3202                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3203                               *nla_l_thresh;
3204
3205                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3206                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
3207                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
3208                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
3209
3210                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3211                         return -EINVAL;
3212
3213                 udest->conn_flags = nla_get_u32(nla_fwd)
3214                                     & IP_VS_CONN_F_FWD_MASK;
3215                 udest->weight = nla_get_u32(nla_weight);
3216                 udest->u_threshold = nla_get_u32(nla_u_thresh);
3217                 udest->l_threshold = nla_get_u32(nla_l_thresh);
3218         }
3219
3220         return 0;
3221 }
3222
3223 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3224                                   const char *mcast_ifn, __be32 syncid)
3225 {
3226         struct nlattr *nl_daemon;
3227
3228         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3229         if (!nl_daemon)
3230                 return -EMSGSIZE;
3231
3232         if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3233             nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3234             nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3235                 goto nla_put_failure;
3236         nla_nest_end(skb, nl_daemon);
3237
3238         return 0;
3239
3240 nla_put_failure:
3241         nla_nest_cancel(skb, nl_daemon);
3242         return -EMSGSIZE;
3243 }
3244
3245 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3246                                   const char *mcast_ifn, __be32 syncid,
3247                                   struct netlink_callback *cb)
3248 {
3249         void *hdr;
3250         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
3251                           &ip_vs_genl_family, NLM_F_MULTI,
3252                           IPVS_CMD_NEW_DAEMON);
3253         if (!hdr)
3254                 return -EMSGSIZE;
3255
3256         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3257                 goto nla_put_failure;
3258
3259         return genlmsg_end(skb, hdr);
3260
3261 nla_put_failure:
3262         genlmsg_cancel(skb, hdr);
3263         return -EMSGSIZE;
3264 }
3265
3266 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3267                                    struct netlink_callback *cb)
3268 {
3269         struct net *net = skb_sknet(skb);
3270         struct netns_ipvs *ipvs = net_ipvs(net);
3271
3272         mutex_lock(&ipvs->sync_mutex);
3273         if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3274                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3275                                            ipvs->master_mcast_ifn,
3276                                            ipvs->master_syncid, cb) < 0)
3277                         goto nla_put_failure;
3278
3279                 cb->args[0] = 1;
3280         }
3281
3282         if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3283                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3284                                            ipvs->backup_mcast_ifn,
3285                                            ipvs->backup_syncid, cb) < 0)
3286                         goto nla_put_failure;
3287
3288                 cb->args[1] = 1;
3289         }
3290
3291 nla_put_failure:
3292         mutex_unlock(&ipvs->sync_mutex);
3293
3294         return skb->len;
3295 }
3296
3297 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3298 {
3299         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3300               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3301               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3302                 return -EINVAL;
3303
3304         return start_sync_thread(net,
3305                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3306                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3307                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3308 }
3309
3310 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3311 {
3312         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3313                 return -EINVAL;
3314
3315         return stop_sync_thread(net,
3316                                 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3317 }
3318
3319 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3320 {
3321         struct ip_vs_timeout_user t;
3322
3323         __ip_vs_get_timeouts(net, &t);
3324
3325         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3326                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3327
3328         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3329                 t.tcp_fin_timeout =
3330                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3331
3332         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3333                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3334
3335         return ip_vs_set_timeout(net, &t);
3336 }
3337
3338 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3339 {
3340         int ret = 0, cmd;
3341         struct net *net;
3342         struct netns_ipvs *ipvs;
3343
3344         net = skb_sknet(skb);
3345         ipvs = net_ipvs(net);
3346         cmd = info->genlhdr->cmd;
3347
3348         if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3349                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3350
3351                 mutex_lock(&ipvs->sync_mutex);
3352                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3353                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3354                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3355                                      ip_vs_daemon_policy)) {
3356                         ret = -EINVAL;
3357                         goto out;
3358                 }
3359
3360                 if (cmd == IPVS_CMD_NEW_DAEMON)
3361                         ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3362                 else
3363                         ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3364 out:
3365                 mutex_unlock(&ipvs->sync_mutex);
3366         }
3367         return ret;
3368 }
3369
3370 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3371 {
3372         struct ip_vs_service *svc = NULL;
3373         struct ip_vs_service_user_kern usvc;
3374         struct ip_vs_dest_user_kern udest;
3375         int ret = 0, cmd;
3376         int need_full_svc = 0, need_full_dest = 0;
3377         struct net *net;
3378
3379         net = skb_sknet(skb);
3380         cmd = info->genlhdr->cmd;
3381
3382         mutex_lock(&__ip_vs_mutex);
3383
3384         if (cmd == IPVS_CMD_FLUSH) {
3385                 ret = ip_vs_flush(net);
3386                 goto out;
3387         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3388                 ret = ip_vs_genl_set_config(net, info->attrs);
3389                 goto out;
3390         } else if (cmd == IPVS_CMD_ZERO &&
3391                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3392                 ret = ip_vs_zero_all(net);
3393                 goto out;
3394         }
3395
3396         /* All following commands require a service argument, so check if we
3397          * received a valid one. We need a full service specification when
3398          * adding / editing a service. Only identifying members otherwise. */
3399         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3400                 need_full_svc = 1;
3401
3402         ret = ip_vs_genl_parse_service(net, &usvc,
3403                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3404                                        need_full_svc, &svc);
3405         if (ret)
3406                 goto out;
3407
3408         /* Unless we're adding a new service, the service must already exist */
3409         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3410                 ret = -ESRCH;
3411                 goto out;
3412         }
3413
3414         /* Destination commands require a valid destination argument. For
3415          * adding / editing a destination, we need a full destination
3416          * specification. */
3417         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3418             cmd == IPVS_CMD_DEL_DEST) {
3419                 if (cmd != IPVS_CMD_DEL_DEST)
3420                         need_full_dest = 1;
3421
3422                 ret = ip_vs_genl_parse_dest(&udest,
3423                                             info->attrs[IPVS_CMD_ATTR_DEST],
3424                                             need_full_dest);
3425                 if (ret)
3426                         goto out;
3427         }
3428
3429         switch (cmd) {
3430         case IPVS_CMD_NEW_SERVICE:
3431                 if (svc == NULL)
3432                         ret = ip_vs_add_service(net, &usvc, &svc);
3433                 else
3434                         ret = -EEXIST;
3435                 break;
3436         case IPVS_CMD_SET_SERVICE:
3437                 ret = ip_vs_edit_service(svc, &usvc);
3438                 break;
3439         case IPVS_CMD_DEL_SERVICE:
3440                 ret = ip_vs_del_service(svc);
3441                 /* do not use svc, it can be freed */
3442                 break;
3443         case IPVS_CMD_NEW_DEST:
3444                 ret = ip_vs_add_dest(svc, &udest);
3445                 break;
3446         case IPVS_CMD_SET_DEST:
3447                 ret = ip_vs_edit_dest(svc, &udest);
3448                 break;
3449         case IPVS_CMD_DEL_DEST:
3450                 ret = ip_vs_del_dest(svc, &udest);
3451                 break;
3452         case IPVS_CMD_ZERO:
3453                 ret = ip_vs_zero_service(svc);
3454                 break;
3455         default:
3456                 ret = -EINVAL;
3457         }
3458
3459 out:
3460         mutex_unlock(&__ip_vs_mutex);
3461
3462         return ret;
3463 }
3464
3465 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3466 {
3467         struct sk_buff *msg;
3468         void *reply;
3469         int ret, cmd, reply_cmd;
3470         struct net *net;
3471
3472         net = skb_sknet(skb);
3473         cmd = info->genlhdr->cmd;
3474
3475         if (cmd == IPVS_CMD_GET_SERVICE)
3476                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3477         else if (cmd == IPVS_CMD_GET_INFO)
3478                 reply_cmd = IPVS_CMD_SET_INFO;
3479         else if (cmd == IPVS_CMD_GET_CONFIG)
3480                 reply_cmd = IPVS_CMD_SET_CONFIG;
3481         else {
3482                 pr_err("unknown Generic Netlink command\n");
3483                 return -EINVAL;
3484         }
3485
3486         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3487         if (!msg)
3488                 return -ENOMEM;
3489
3490         mutex_lock(&__ip_vs_mutex);
3491
3492         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3493         if (reply == NULL)
3494                 goto nla_put_failure;
3495
3496         switch (cmd) {
3497         case IPVS_CMD_GET_SERVICE:
3498         {
3499                 struct ip_vs_service *svc;
3500
3501                 svc = ip_vs_genl_find_service(net,
3502                                               info->attrs[IPVS_CMD_ATTR_SERVICE]);
3503                 if (IS_ERR(svc)) {
3504                         ret = PTR_ERR(svc);
3505                         goto out_err;
3506                 } else if (svc) {
3507                         ret = ip_vs_genl_fill_service(msg, svc);
3508                         if (ret)
3509                                 goto nla_put_failure;
3510                 } else {
3511                         ret = -ESRCH;
3512                         goto out_err;
3513                 }
3514
3515                 break;
3516         }
3517
3518         case IPVS_CMD_GET_CONFIG:
3519         {
3520                 struct ip_vs_timeout_user t;
3521
3522                 __ip_vs_get_timeouts(net, &t);
3523 #ifdef CONFIG_IP_VS_PROTO_TCP
3524                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3525                                 t.tcp_timeout) ||
3526                     nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3527                                 t.tcp_fin_timeout))
3528                         goto nla_put_failure;
3529 #endif
3530 #ifdef CONFIG_IP_VS_PROTO_UDP
3531                 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3532                         goto nla_put_failure;
3533 #endif
3534
3535                 break;
3536         }
3537
3538         case IPVS_CMD_GET_INFO:
3539                 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3540                                 IP_VS_VERSION_CODE) ||
3541                     nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3542                                 ip_vs_conn_tab_size))
3543                         goto nla_put_failure;
3544                 break;
3545         }
3546
3547         genlmsg_end(msg, reply);
3548         ret = genlmsg_reply(msg, info);
3549         goto out;
3550
3551 nla_put_failure:
3552         pr_err("not enough space in Netlink message\n");
3553         ret = -EMSGSIZE;
3554
3555 out_err:
3556         nlmsg_free(msg);
3557 out:
3558         mutex_unlock(&__ip_vs_mutex);
3559
3560         return ret;
3561 }
3562
3563
3564 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3565         {
3566                 .cmd    = IPVS_CMD_NEW_SERVICE,
3567                 .flags  = GENL_ADMIN_PERM,
3568                 .policy = ip_vs_cmd_policy,
3569                 .doit   = ip_vs_genl_set_cmd,
3570         },
3571         {
3572                 .cmd    = IPVS_CMD_SET_SERVICE,
3573                 .flags  = GENL_ADMIN_PERM,
3574                 .policy = ip_vs_cmd_policy,
3575                 .doit   = ip_vs_genl_set_cmd,
3576         },
3577         {
3578                 .cmd    = IPVS_CMD_DEL_SERVICE,
3579                 .flags  = GENL_ADMIN_PERM,
3580                 .policy = ip_vs_cmd_policy,
3581                 .doit   = ip_vs_genl_set_cmd,
3582         },
3583         {
3584                 .cmd    = IPVS_CMD_GET_SERVICE,
3585                 .flags  = GENL_ADMIN_PERM,
3586                 .doit   = ip_vs_genl_get_cmd,
3587                 .dumpit = ip_vs_genl_dump_services,
3588                 .policy = ip_vs_cmd_policy,
3589         },
3590         {
3591                 .cmd    = IPVS_CMD_NEW_DEST,
3592                 .flags  = GENL_ADMIN_PERM,
3593                 .policy = ip_vs_cmd_policy,
3594                 .doit   = ip_vs_genl_set_cmd,
3595         },
3596         {
3597                 .cmd    = IPVS_CMD_SET_DEST,
3598                 .flags  = GENL_ADMIN_PERM,
3599                 .policy = ip_vs_cmd_policy,
3600                 .doit   = ip_vs_genl_set_cmd,
3601         },
3602         {
3603                 .cmd    = IPVS_CMD_DEL_DEST,
3604                 .flags  = GENL_ADMIN_PERM,
3605                 .policy = ip_vs_cmd_policy,
3606                 .doit   = ip_vs_genl_set_cmd,
3607         },
3608         {
3609                 .cmd    = IPVS_CMD_GET_DEST,
3610                 .flags  = GENL_ADMIN_PERM,
3611                 .policy = ip_vs_cmd_policy,
3612                 .dumpit = ip_vs_genl_dump_dests,
3613         },
3614         {
3615                 .cmd    = IPVS_CMD_NEW_DAEMON,
3616                 .flags  = GENL_ADMIN_PERM,
3617                 .policy = ip_vs_cmd_policy,
3618                 .doit   = ip_vs_genl_set_daemon,
3619         },
3620         {
3621                 .cmd    = IPVS_CMD_DEL_DAEMON,
3622                 .flags  = GENL_ADMIN_PERM,
3623                 .policy = ip_vs_cmd_policy,
3624                 .doit   = ip_vs_genl_set_daemon,
3625         },
3626         {
3627                 .cmd    = IPVS_CMD_GET_DAEMON,
3628                 .flags  = GENL_ADMIN_PERM,
3629                 .dumpit = ip_vs_genl_dump_daemons,
3630         },
3631         {
3632                 .cmd    = IPVS_CMD_SET_CONFIG,
3633                 .flags  = GENL_ADMIN_PERM,
3634                 .policy = ip_vs_cmd_policy,
3635                 .doit   = ip_vs_genl_set_cmd,
3636         },
3637         {
3638                 .cmd    = IPVS_CMD_GET_CONFIG,
3639                 .flags  = GENL_ADMIN_PERM,
3640                 .doit   = ip_vs_genl_get_cmd,
3641         },
3642         {
3643                 .cmd    = IPVS_CMD_GET_INFO,
3644                 .flags  = GENL_ADMIN_PERM,
3645                 .doit   = ip_vs_genl_get_cmd,
3646         },
3647         {
3648                 .cmd    = IPVS_CMD_ZERO,
3649                 .flags  = GENL_ADMIN_PERM,
3650                 .policy = ip_vs_cmd_policy,
3651                 .doit   = ip_vs_genl_set_cmd,
3652         },
3653         {
3654                 .cmd    = IPVS_CMD_FLUSH,
3655                 .flags  = GENL_ADMIN_PERM,
3656                 .doit   = ip_vs_genl_set_cmd,
3657         },
3658 };
3659
3660 static int __init ip_vs_genl_register(void)
3661 {
3662         return genl_register_family_with_ops(&ip_vs_genl_family,
3663                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3664 }
3665
3666 static void ip_vs_genl_unregister(void)
3667 {
3668         genl_unregister_family(&ip_vs_genl_family);
3669 }
3670
3671 /* End of Generic Netlink interface definitions */
3672
3673 /*
3674  * per netns intit/exit func.
3675  */
3676 #ifdef CONFIG_SYSCTL
3677 int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3678 {
3679         int idx;
3680         struct netns_ipvs *ipvs = net_ipvs(net);
3681         struct ctl_table *tbl;
3682
3683         atomic_set(&ipvs->dropentry, 0);
3684         spin_lock_init(&ipvs->dropentry_lock);
3685         spin_lock_init(&ipvs->droppacket_lock);
3686         spin_lock_init(&ipvs->securetcp_lock);
3687
3688         if (!net_eq(net, &init_net)) {
3689                 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3690                 if (tbl == NULL)
3691                         return -ENOMEM;
3692         } else
3693                 tbl = vs_vars;
3694         /* Initialize sysctl defaults */
3695         idx = 0;
3696         ipvs->sysctl_amemthresh = 1024;
3697         tbl[idx++].data = &ipvs->sysctl_amemthresh;
3698         ipvs->sysctl_am_droprate = 10;
3699         tbl[idx++].data = &ipvs->sysctl_am_droprate;
3700         tbl[idx++].data = &ipvs->sysctl_drop_entry;
3701         tbl[idx++].data = &ipvs->sysctl_drop_packet;
3702 #ifdef CONFIG_IP_VS_NFCT
3703         tbl[idx++].data = &ipvs->sysctl_conntrack;
3704 #endif
3705         tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3706         ipvs->sysctl_snat_reroute = 1;
3707         tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3708         ipvs->sysctl_sync_ver = 1;
3709         tbl[idx++].data = &ipvs->sysctl_sync_ver;
3710         ipvs->sysctl_sync_ports = 1;
3711         tbl[idx++].data = &ipvs->sysctl_sync_ports;
3712         ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32;
3713         tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3714         ipvs->sysctl_sync_sock_size = 0;
3715         tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3716         tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3717         tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3718         tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3719         ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
3720         ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
3721         tbl[idx].data = &ipvs->sysctl_sync_threshold;
3722         tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3723         ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
3724         tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3725         ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3726         tbl[idx++].data = &ipvs->sysctl_sync_retries;
3727         tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3728
3729
3730         ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3731         if (ipvs->sysctl_hdr == NULL) {
3732                 if (!net_eq(net, &init_net))
3733                         kfree(tbl);
3734                 return -ENOMEM;
3735         }
3736         ip_vs_start_estimator(net, &ipvs->tot_stats);
3737         ipvs->sysctl_tbl = tbl;
3738         /* Schedule defense work */
3739         INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3740         schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3741
3742         return 0;
3743 }
3744
3745 void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3746 {
3747         struct netns_ipvs *ipvs = net_ipvs(net);
3748
3749         cancel_delayed_work_sync(&ipvs->defense_work);
3750         cancel_work_sync(&ipvs->defense_work.work);
3751         unregister_net_sysctl_table(ipvs->sysctl_hdr);
3752 }
3753
3754 #else
3755
3756 int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3757 void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3758
3759 #endif
3760
3761 static struct notifier_block ip_vs_dst_notifier = {
3762         .notifier_call = ip_vs_dst_event,
3763 };
3764
3765 int __net_init ip_vs_control_net_init(struct net *net)
3766 {
3767         int idx;
3768         struct netns_ipvs *ipvs = net_ipvs(net);
3769
3770         rwlock_init(&ipvs->rs_lock);
3771
3772         /* Initialize rs_table */
3773         for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3774                 INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3775
3776         INIT_LIST_HEAD(&ipvs->dest_trash);
3777         atomic_set(&ipvs->ftpsvc_counter, 0);
3778         atomic_set(&ipvs->nullsvc_counter, 0);
3779
3780         /* procfs stats */
3781         ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3782         if (!ipvs->tot_stats.cpustats)
3783                 return -ENOMEM;
3784
3785         spin_lock_init(&ipvs->tot_stats.lock);
3786
3787         proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3788         proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3789         proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3790                              &ip_vs_stats_percpu_fops);
3791
3792         if (ip_vs_control_net_init_sysctl(net))
3793                 goto err;
3794
3795         return 0;
3796
3797 err:
3798         free_percpu(ipvs->tot_stats.cpustats);
3799         return -ENOMEM;
3800 }
3801
3802 void __net_exit ip_vs_control_net_cleanup(struct net *net)
3803 {
3804         struct netns_ipvs *ipvs = net_ipvs(net);
3805
3806         ip_vs_trash_cleanup(net);
3807         ip_vs_stop_estimator(net, &ipvs->tot_stats);
3808         ip_vs_control_net_cleanup_sysctl(net);
3809         proc_net_remove(net, "ip_vs_stats_percpu");
3810         proc_net_remove(net, "ip_vs_stats");
3811         proc_net_remove(net, "ip_vs");
3812         free_percpu(ipvs->tot_stats.cpustats);
3813 }
3814
3815 int __init ip_vs_register_nl_ioctl(void)
3816 {
3817         int ret;
3818
3819         ret = nf_register_sockopt(&ip_vs_sockopts);
3820         if (ret) {
3821                 pr_err("cannot register sockopt.\n");
3822                 goto err_sock;
3823         }
3824
3825         ret = ip_vs_genl_register();
3826         if (ret) {
3827                 pr_err("cannot register Generic Netlink interface.\n");
3828                 goto err_genl;
3829         }
3830         return 0;
3831
3832 err_genl:
3833         nf_unregister_sockopt(&ip_vs_sockopts);
3834 err_sock:
3835         return ret;
3836 }
3837
3838 void ip_vs_unregister_nl_ioctl(void)
3839 {
3840         ip_vs_genl_unregister();
3841         nf_unregister_sockopt(&ip_vs_sockopts);
3842 }
3843
3844 int __init ip_vs_control_init(void)
3845 {
3846         int idx;
3847         int ret;
3848
3849         EnterFunction(2);
3850
3851         /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3852         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3853                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3854                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3855         }
3856
3857         smp_wmb();      /* Do we really need it now ? */
3858
3859         ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3860         if (ret < 0)
3861                 return ret;
3862
3863         LeaveFunction(2);
3864         return 0;
3865 }
3866
3867
3868 void ip_vs_control_cleanup(void)
3869 {
3870         EnterFunction(2);
3871         unregister_netdevice_notifier(&ip_vs_dst_notifier);
3872         LeaveFunction(2);
3873 }