netfilter: xt_qtaguid: extend iface stat to report protocols
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/skbuff.h>
23 #include <linux/workqueue.h>
24 #include <net/addrconf.h>
25 #include <net/sock.h>
26 #include <net/tcp.h>
27 #include <net/udp.h>
28
29 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
30 #include <linux/netfilter_ipv6/ip6_tables.h>
31 #endif
32
33 #include <linux/netfilter/xt_socket.h>
34 #include "xt_qtaguid_internal.h"
35 #include "xt_qtaguid_print.h"
36
37 /*
38  * We only use the xt_socket funcs within a similar context to avoid unexpected
39  * return values.
40  */
41 #define XT_SOCKET_SUPPORTED_HOOKS \
42         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
43
44
45 static const char *module_procdirname = "xt_qtaguid";
46 static struct proc_dir_entry *xt_qtaguid_procdir;
47
48 static unsigned int proc_iface_perms = S_IRUGO;
49 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
50
51 static struct proc_dir_entry *xt_qtaguid_stats_file;
52 static unsigned int proc_stats_perms = S_IRUGO;
53 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
54
55 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
56
57 /* Everybody can write. But proc_ctrl_write_limited is true by default which
58  * limits what can be controlled. See the can_*() functions.
59  */
60 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
61 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
62
63 /* Limited by default, so the gid of the ctrl and stats proc entries
64  * will limit what can be done. See the can_*() functions.
65  */
66 static bool proc_stats_readall_limited = true;
67 static bool proc_ctrl_write_limited = true;
68
69 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
70                    S_IRUGO | S_IWUSR);
71 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
72                    S_IRUGO | S_IWUSR);
73
74 /*
75  * Limit the number of active tags (via socket tags) for a given UID.
76  * Multiple processes could share the UID.
77  */
78 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
79 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
80
81 /*
82  * After the kernel has initiallized this module, it is still possible
83  * to make it passive.
84  * Setting passive to Y:
85  *  - the iface stats handling will not act on notifications.
86  *  - iptables matches will never match.
87  *  - ctrl commands silently succeed.
88  *  - stats are always empty.
89  * This is mostly usefull when a bug is suspected.
90  */
91 static bool module_passive;
92 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
93
94 /*
95  * Control how qtaguid data is tracked per proc/uid.
96  * Setting tag_tracking_passive to Y:
97  *  - don't create proc specific structs to track tags
98  *  - don't check that active tag stats exceed some limits.
99  *  - don't clean up socket tags on process exits.
100  * This is mostly usefull when a bug is suspected.
101  */
102 static bool qtu_proc_handling_passive;
103 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
104                    S_IRUGO | S_IWUSR);
105
106 #define QTU_DEV_NAME "xt_qtaguid"
107
108 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
109 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
110
111 /*---------------------------------------------------------------------------*/
112 static const char *iface_stat_procdirname = "iface_stat";
113 static struct proc_dir_entry *iface_stat_procdir;
114 /*
115  * The iface_stat_all* will go away once userspace gets use to the new fields
116  * that have a format line.
117  */
118 static const char *iface_stat_all_procfilename = "iface_stat_all";
119 static struct proc_dir_entry *iface_stat_all_procfile;
120 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
121 static struct proc_dir_entry *iface_stat_fmt_procfile;
122
123
124 /*
125  * Ordering of locks:
126  *  outer locks:
127  *    iface_stat_list_lock
128  *    sock_tag_list_lock
129  *  inner locks:
130  *    uid_tag_data_tree_lock
131  *    tag_counter_set_list_lock
132  * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
133  * is acquired.
134  *
135  * Call tree with all lock holders as of 2012-04-27:
136  *
137  * iface_stat_fmt_proc_read()
138  *   iface_stat_list_lock
139  *     (struct iface_stat)
140  *
141  * qtaguid_ctrl_proc_read()
142  *   sock_tag_list_lock
143  *     (sock_tag_tree)
144  *     (struct proc_qtu_data->sock_tag_list)
145  *   prdebug_full_state()
146  *     sock_tag_list_lock
147  *       (sock_tag_tree)
148  *     uid_tag_data_tree_lock
149  *       (uid_tag_data_tree)
150  *       (proc_qtu_data_tree)
151  *     iface_stat_list_lock
152  *
153  * qtaguid_stats_proc_read()
154  *   iface_stat_list_lock
155  *     struct iface_stat->tag_stat_list_lock
156  *
157  * qtudev_open()
158  *   uid_tag_data_tree_lock
159  *
160  * qtudev_release()
161  *   sock_tag_data_list_lock
162  *     uid_tag_data_tree_lock
163  *   prdebug_full_state()
164  *     sock_tag_list_lock
165  *     uid_tag_data_tree_lock
166  *     iface_stat_list_lock
167  *
168  * iface_netdev_event_handler()
169  *   iface_stat_create()
170  *     iface_stat_list_lock
171  *   iface_stat_update()
172  *     iface_stat_list_lock
173  *
174  * iface_inetaddr_event_handler()
175  *   iface_stat_create()
176  *     iface_stat_list_lock
177  *   iface_stat_update()
178  *     iface_stat_list_lock
179  *
180  * iface_inet6addr_event_handler()
181  *   iface_stat_create_ipv6()
182  *     iface_stat_list_lock
183  *   iface_stat_update()
184  *     iface_stat_list_lock
185  *
186  * qtaguid_mt()
187  *   account_for_uid()
188  *     if_tag_stat_update()
189  *       get_sock_stat()
190  *         sock_tag_list_lock
191  *       struct iface_stat->tag_stat_list_lock
192  *         tag_stat_update()
193  *           get_active_counter_set()
194  *             tag_counter_set_list_lock
195  *         tag_stat_update()
196  *           get_active_counter_set()
197  *             tag_counter_set_list_lock
198  *
199  *
200  * qtaguid_ctrl_parse()
201  *   ctrl_cmd_delete()
202  *     sock_tag_list_lock
203  *     tag_counter_set_list_lock
204  *     iface_stat_list_lock
205  *       struct iface_stat->tag_stat_list_lock
206  *     uid_tag_data_tree_lock
207  *   ctrl_cmd_counter_set()
208  *     tag_counter_set_list_lock
209  *   ctrl_cmd_tag()
210  *     sock_tag_list_lock
211  *       (sock_tag_tree)
212  *       get_tag_ref()
213  *         uid_tag_data_tree_lock
214  *           (uid_tag_data_tree)
215  *       uid_tag_data_tree_lock
216  *         (proc_qtu_data_tree)
217  *   ctrl_cmd_untag()
218  *     sock_tag_list_lock
219  *     uid_tag_data_tree_lock
220  *
221  */
222 static LIST_HEAD(iface_stat_list);
223 static DEFINE_SPINLOCK(iface_stat_list_lock);
224
225 static struct rb_root sock_tag_tree = RB_ROOT;
226 static DEFINE_SPINLOCK(sock_tag_list_lock);
227
228 static struct rb_root tag_counter_set_tree = RB_ROOT;
229 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
230
231 static struct rb_root uid_tag_data_tree = RB_ROOT;
232 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
233
234 static struct rb_root proc_qtu_data_tree = RB_ROOT;
235 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
236
237 static struct qtaguid_event_counts qtu_events;
238 /*----------------------------------------------*/
239 static bool can_manipulate_uids(void)
240 {
241         /* root pwnd */
242         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
243                 || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited)
244                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
245 }
246
247 static bool can_impersonate_uid(uid_t uid)
248 {
249         return uid == current_fsuid() || can_manipulate_uids();
250 }
251
252 static bool can_read_other_uid_stats(uid_t uid)
253 {
254         /* root pwnd */
255         return in_egroup_p(xt_qtaguid_stats_file->gid)
256                 || unlikely(!current_fsuid()) || uid == current_fsuid()
257                 || unlikely(!proc_stats_readall_limited)
258                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
259 }
260
261 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
262                                   enum ifs_tx_rx direction,
263                                   enum ifs_proto ifs_proto,
264                                   int bytes,
265                                   int packets)
266 {
267         counters->bpc[set][direction][ifs_proto].bytes += bytes;
268         counters->bpc[set][direction][ifs_proto].packets += packets;
269 }
270
271 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
272 {
273         struct rb_node *node = root->rb_node;
274
275         while (node) {
276                 struct tag_node *data = rb_entry(node, struct tag_node, node);
277                 int result;
278                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
279                          " node=%p data=%p\n", tag, node, data);
280                 result = tag_compare(tag, data->tag);
281                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
282                          " data.tag=0x%llx (uid=%u) res=%d\n",
283                          tag, data->tag, get_uid_from_tag(data->tag), result);
284                 if (result < 0)
285                         node = node->rb_left;
286                 else if (result > 0)
287                         node = node->rb_right;
288                 else
289                         return data;
290         }
291         return NULL;
292 }
293
294 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
295 {
296         struct rb_node **new = &(root->rb_node), *parent = NULL;
297
298         /* Figure out where to put new node */
299         while (*new) {
300                 struct tag_node *this = rb_entry(*new, struct tag_node,
301                                                  node);
302                 int result = tag_compare(data->tag, this->tag);
303                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
304                          " (uid=%u)\n", __func__,
305                          this->tag,
306                          get_uid_from_tag(this->tag));
307                 parent = *new;
308                 if (result < 0)
309                         new = &((*new)->rb_left);
310                 else if (result > 0)
311                         new = &((*new)->rb_right);
312                 else
313                         BUG();
314         }
315
316         /* Add new node and rebalance tree. */
317         rb_link_node(&data->node, parent, new);
318         rb_insert_color(&data->node, root);
319 }
320
321 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
322 {
323         tag_node_tree_insert(&data->tn, root);
324 }
325
326 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
327 {
328         struct tag_node *node = tag_node_tree_search(root, tag);
329         if (!node)
330                 return NULL;
331         return rb_entry(&node->node, struct tag_stat, tn.node);
332 }
333
334 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
335                                         struct rb_root *root)
336 {
337         tag_node_tree_insert(&data->tn, root);
338 }
339
340 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
341                                                            tag_t tag)
342 {
343         struct tag_node *node = tag_node_tree_search(root, tag);
344         if (!node)
345                 return NULL;
346         return rb_entry(&node->node, struct tag_counter_set, tn.node);
347
348 }
349
350 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
351 {
352         tag_node_tree_insert(&data->tn, root);
353 }
354
355 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
356 {
357         struct tag_node *node = tag_node_tree_search(root, tag);
358         if (!node)
359                 return NULL;
360         return rb_entry(&node->node, struct tag_ref, tn.node);
361 }
362
363 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
364                                              const struct sock *sk)
365 {
366         struct rb_node *node = root->rb_node;
367
368         while (node) {
369                 struct sock_tag *data = rb_entry(node, struct sock_tag,
370                                                  sock_node);
371                 if (sk < data->sk)
372                         node = node->rb_left;
373                 else if (sk > data->sk)
374                         node = node->rb_right;
375                 else
376                         return data;
377         }
378         return NULL;
379 }
380
381 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
382 {
383         struct rb_node **new = &(root->rb_node), *parent = NULL;
384
385         /* Figure out where to put new node */
386         while (*new) {
387                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
388                                                  sock_node);
389                 parent = *new;
390                 if (data->sk < this->sk)
391                         new = &((*new)->rb_left);
392                 else if (data->sk > this->sk)
393                         new = &((*new)->rb_right);
394                 else
395                         BUG();
396         }
397
398         /* Add new node and rebalance tree. */
399         rb_link_node(&data->sock_node, parent, new);
400         rb_insert_color(&data->sock_node, root);
401 }
402
403 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
404 {
405         struct rb_node *node;
406         struct sock_tag *st_entry;
407
408         node = rb_first(st_to_free_tree);
409         while (node) {
410                 st_entry = rb_entry(node, struct sock_tag, sock_node);
411                 node = rb_next(node);
412                 CT_DEBUG("qtaguid: %s(): "
413                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
414                          st_entry->sk,
415                          st_entry->tag,
416                          get_uid_from_tag(st_entry->tag));
417                 rb_erase(&st_entry->sock_node, st_to_free_tree);
418                 sockfd_put(st_entry->socket);
419                 kfree(st_entry);
420         }
421 }
422
423 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
424                                                        const pid_t pid)
425 {
426         struct rb_node *node = root->rb_node;
427
428         while (node) {
429                 struct proc_qtu_data *data = rb_entry(node,
430                                                       struct proc_qtu_data,
431                                                       node);
432                 if (pid < data->pid)
433                         node = node->rb_left;
434                 else if (pid > data->pid)
435                         node = node->rb_right;
436                 else
437                         return data;
438         }
439         return NULL;
440 }
441
442 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
443                                       struct rb_root *root)
444 {
445         struct rb_node **new = &(root->rb_node), *parent = NULL;
446
447         /* Figure out where to put new node */
448         while (*new) {
449                 struct proc_qtu_data *this = rb_entry(*new,
450                                                       struct proc_qtu_data,
451                                                       node);
452                 parent = *new;
453                 if (data->pid < this->pid)
454                         new = &((*new)->rb_left);
455                 else if (data->pid > this->pid)
456                         new = &((*new)->rb_right);
457                 else
458                         BUG();
459         }
460
461         /* Add new node and rebalance tree. */
462         rb_link_node(&data->node, parent, new);
463         rb_insert_color(&data->node, root);
464 }
465
466 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
467                                      struct rb_root *root)
468 {
469         struct rb_node **new = &(root->rb_node), *parent = NULL;
470
471         /* Figure out where to put new node */
472         while (*new) {
473                 struct uid_tag_data *this = rb_entry(*new,
474                                                      struct uid_tag_data,
475                                                      node);
476                 parent = *new;
477                 if (data->uid < this->uid)
478                         new = &((*new)->rb_left);
479                 else if (data->uid > this->uid)
480                         new = &((*new)->rb_right);
481                 else
482                         BUG();
483         }
484
485         /* Add new node and rebalance tree. */
486         rb_link_node(&data->node, parent, new);
487         rb_insert_color(&data->node, root);
488 }
489
490 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
491                                                      uid_t uid)
492 {
493         struct rb_node *node = root->rb_node;
494
495         while (node) {
496                 struct uid_tag_data *data = rb_entry(node,
497                                                      struct uid_tag_data,
498                                                      node);
499                 if (uid < data->uid)
500                         node = node->rb_left;
501                 else if (uid > data->uid)
502                         node = node->rb_right;
503                 else
504                         return data;
505         }
506         return NULL;
507 }
508
509 /*
510  * Allocates a new uid_tag_data struct if needed.
511  * Returns a pointer to the found or allocated uid_tag_data.
512  * Returns a PTR_ERR on failures, and lock is not held.
513  * If found is not NULL:
514  *   sets *found to true if not allocated.
515  *   sets *found to false if allocated.
516  */
517 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
518 {
519         struct uid_tag_data *utd_entry;
520
521         /* Look for top level uid_tag_data for the UID */
522         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
523         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
524
525         if (found_res)
526                 *found_res = utd_entry;
527         if (utd_entry)
528                 return utd_entry;
529
530         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
531         if (!utd_entry) {
532                 pr_err("qtaguid: get_uid_data(%u): "
533                        "tag data alloc failed\n", uid);
534                 return ERR_PTR(-ENOMEM);
535         }
536
537         utd_entry->uid = uid;
538         utd_entry->tag_ref_tree = RB_ROOT;
539         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
540         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
541         return utd_entry;
542 }
543
544 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
545 static struct tag_ref *new_tag_ref(tag_t new_tag,
546                                    struct uid_tag_data *utd_entry)
547 {
548         struct tag_ref *tr_entry;
549         int res;
550
551         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
552                 pr_info("qtaguid: new_tag_ref(0x%llx): "
553                         "tag ref alloc quota exceeded. max=%d\n",
554                         new_tag, max_sock_tags);
555                 res = -EMFILE;
556                 goto err_res;
557
558         }
559
560         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
561         if (!tr_entry) {
562                 pr_err("qtaguid: new_tag_ref(0x%llx): "
563                        "tag ref alloc failed\n",
564                        new_tag);
565                 res = -ENOMEM;
566                 goto err_res;
567         }
568         tr_entry->tn.tag = new_tag;
569         /* tr_entry->num_sock_tags  handled by caller */
570         utd_entry->num_active_tags++;
571         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
572         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
573                  " inserted new tag ref %p\n",
574                  new_tag, tr_entry);
575         return tr_entry;
576
577 err_res:
578         return ERR_PTR(res);
579 }
580
581 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
582                                       struct uid_tag_data **utd_res)
583 {
584         struct uid_tag_data *utd_entry;
585         struct tag_ref *tr_entry;
586         bool found_utd;
587         uid_t uid = get_uid_from_tag(full_tag);
588
589         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
590                  full_tag, uid);
591
592         utd_entry = get_uid_data(uid, &found_utd);
593         if (IS_ERR_OR_NULL(utd_entry)) {
594                 if (utd_res)
595                         *utd_res = utd_entry;
596                 return NULL;
597         }
598
599         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
600         if (utd_res)
601                 *utd_res = utd_entry;
602         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
603                  full_tag, utd_entry, tr_entry);
604         return tr_entry;
605 }
606
607 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
608 static struct tag_ref *get_tag_ref(tag_t full_tag,
609                                    struct uid_tag_data **utd_res)
610 {
611         struct uid_tag_data *utd_entry;
612         struct tag_ref *tr_entry;
613
614         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
615                  full_tag);
616         spin_lock_bh(&uid_tag_data_tree_lock);
617         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
618         BUG_ON(IS_ERR_OR_NULL(utd_entry));
619         if (!tr_entry)
620                 tr_entry = new_tag_ref(full_tag, utd_entry);
621
622         spin_unlock_bh(&uid_tag_data_tree_lock);
623         if (utd_res)
624                 *utd_res = utd_entry;
625         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
626                  full_tag, utd_entry, tr_entry);
627         return tr_entry;
628 }
629
630 /* Checks and maybe frees the UID Tag Data entry */
631 static void put_utd_entry(struct uid_tag_data *utd_entry)
632 {
633         /* Are we done with the UID tag data entry? */
634         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
635                 !utd_entry->num_pqd) {
636                 DR_DEBUG("qtaguid: %s(): "
637                          "erase utd_entry=%p uid=%u "
638                          "by pid=%u tgid=%u uid=%u\n", __func__,
639                          utd_entry, utd_entry->uid,
640                          current->pid, current->tgid, current_fsuid());
641                 BUG_ON(utd_entry->num_active_tags);
642                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
643                 kfree(utd_entry);
644         } else {
645                 DR_DEBUG("qtaguid: %s(): "
646                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
647                          __func__, utd_entry, utd_entry->num_active_tags,
648                          utd_entry->num_pqd);
649                 BUG_ON(!(utd_entry->num_active_tags ||
650                          utd_entry->num_pqd));
651         }
652 }
653
654 /*
655  * If no sock_tags are using this tag_ref,
656  * decrements refcount of utd_entry, removes tr_entry
657  * from utd_entry->tag_ref_tree and frees.
658  */
659 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
660                                         struct uid_tag_data *utd_entry)
661 {
662         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
663                  tr_entry, tr_entry->tn.tag,
664                  get_uid_from_tag(tr_entry->tn.tag));
665         if (!tr_entry->num_sock_tags) {
666                 BUG_ON(!utd_entry->num_active_tags);
667                 utd_entry->num_active_tags--;
668                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
669                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
670                 kfree(tr_entry);
671         }
672 }
673
674 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
675 {
676         struct rb_node *node;
677         struct tag_ref *tr_entry;
678         tag_t acct_tag;
679
680         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
681                  full_tag, get_uid_from_tag(full_tag));
682         acct_tag = get_atag_from_tag(full_tag);
683         node = rb_first(&utd_entry->tag_ref_tree);
684         while (node) {
685                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
686                 node = rb_next(node);
687                 if (!acct_tag || tr_entry->tn.tag == full_tag)
688                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
689         }
690 }
691
692 static int read_proc_u64(char *page, char **start, off_t off,
693                         int count, int *eof, void *data)
694 {
695         int len;
696         uint64_t value;
697         char *p = page;
698         uint64_t *iface_entry = data;
699
700         if (!data)
701                 return 0;
702
703         value = *iface_entry;
704         p += sprintf(p, "%llu\n", value);
705         len = (p - page) - off;
706         *eof = (len <= count) ? 1 : 0;
707         *start = page + off;
708         return len;
709 }
710
711 static int read_proc_bool(char *page, char **start, off_t off,
712                         int count, int *eof, void *data)
713 {
714         int len;
715         bool value;
716         char *p = page;
717         bool *bool_entry = data;
718
719         if (!data)
720                 return 0;
721
722         value = *bool_entry;
723         p += sprintf(p, "%u\n", value);
724         len = (p - page) - off;
725         *eof = (len <= count) ? 1 : 0;
726         *start = page + off;
727         return len;
728 }
729
730 static int get_active_counter_set(tag_t tag)
731 {
732         int active_set = 0;
733         struct tag_counter_set *tcs;
734
735         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
736                  " (uid=%u)\n",
737                  tag, get_uid_from_tag(tag));
738         /* For now we only handle UID tags for active sets */
739         tag = get_utag_from_tag(tag);
740         spin_lock_bh(&tag_counter_set_list_lock);
741         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
742         if (tcs)
743                 active_set = tcs->active_set;
744         spin_unlock_bh(&tag_counter_set_list_lock);
745         return active_set;
746 }
747
748 /*
749  * Find the entry for tracking the specified interface.
750  * Caller must hold iface_stat_list_lock
751  */
752 static struct iface_stat *get_iface_entry(const char *ifname)
753 {
754         struct iface_stat *iface_entry;
755
756         /* Find the entry for tracking the specified tag within the interface */
757         if (ifname == NULL) {
758                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
759                 return NULL;
760         }
761
762         /* Iterate over interfaces */
763         list_for_each_entry(iface_entry, &iface_stat_list, list) {
764                 if (!strcmp(ifname, iface_entry->ifname))
765                         goto done;
766         }
767         iface_entry = NULL;
768 done:
769         return iface_entry;
770 }
771
772 /* This is for fmt2 only */
773 static int pp_iface_stat_line(bool header, char *outp,
774                               int char_count, struct iface_stat *iface_entry)
775 {
776         int len;
777         if (header) {
778                 len = snprintf(outp, char_count,
779                                "ifname "
780                                "total_skb_rx_bytes total_skb_rx_packets "
781                                "total_skb_tx_bytes total_skb_tx_packets "
782                                "rx_tcp_bytes rx_tcp_packets "
783                                "rx_udp_bytes rx_udp_packets "
784                                "rx_other_bytes rx_other_packets "
785                                "tx_tcp_bytes tx_tcp_packets "
786                                "tx_udp_bytes tx_udp_packets "
787                                "tx_other_bytes tx_other_packets\n"
788                         );
789         } else {
790                 struct data_counters *cnts;
791                 int cnt_set = 0;   /* We only use one set for the device */
792                 cnts = &iface_entry->totals_via_skb;
793                 len = snprintf(
794                         outp, char_count,
795                         "%s "
796                         "%llu %llu %llu %llu %llu %llu %llu %llu "
797                         "%llu %llu %llu %llu %llu %llu %llu %llu\n",
798                         iface_entry->ifname,
799                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
800                         dc_sum_packets(cnts, cnt_set, IFS_RX),
801                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
802                         dc_sum_packets(cnts, cnt_set, IFS_TX),
803                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
804                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
805                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
806                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
807                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
808                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
809                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
810                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
811                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
812                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
813                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
814                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
815         }
816         return len;
817 }
818
819 static int iface_stat_fmt_proc_read(char *page, char **num_items_returned,
820                                     off_t items_to_skip, int char_count,
821                                     int *eof, void *data)
822 {
823         char *outp = page;
824         int item_index = 0;
825         int len;
826         int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */
827         struct iface_stat *iface_entry;
828         struct rtnl_link_stats64 dev_stats, *stats;
829         struct rtnl_link_stats64 no_dev_stats = {0};
830
831         if (unlikely(module_passive)) {
832                 *eof = 1;
833                 return 0;
834         }
835
836         CT_DEBUG("qtaguid:proc iface_stat_fmt "
837                  "pid=%u tgid=%u uid=%u "
838                  "page=%p *num_items_returned=%p off=%ld "
839                  "char_count=%d *eof=%d\n",
840                  current->pid, current->tgid, current_fsuid(),
841                  page, *num_items_returned,
842                  items_to_skip, char_count, *eof);
843
844         if (*eof)
845                 return 0;
846
847         if (fmt == 2 && item_index++ >= items_to_skip) {
848                 len = pp_iface_stat_line(true, outp, char_count, NULL);
849                 if (len >= char_count) {
850                         *outp = '\0';
851                         return outp - page;
852                 }
853                 outp += len;
854                 char_count -= len;
855                 (*num_items_returned)++;
856         }
857
858         /*
859          * This lock will prevent iface_stat_update() from changing active,
860          * and in turn prevent an interface from unregistering itself.
861          */
862         spin_lock_bh(&iface_stat_list_lock);
863         list_for_each_entry(iface_entry, &iface_stat_list, list) {
864                 if (item_index++ < items_to_skip)
865                         continue;
866
867                 if (iface_entry->active) {
868                         stats = dev_get_stats(iface_entry->net_dev,
869                                               &dev_stats);
870                 } else {
871                         stats = &no_dev_stats;
872                 }
873                 /*
874                  * If the meaning of the data changes, then update the fmtX
875                  * string.
876                  */
877                 if (fmt == 1) {
878                         len = snprintf(
879                                 outp, char_count,
880                                 "%s %d "
881                                 "%llu %llu %llu %llu "
882                                 "%llu %llu %llu %llu\n",
883                                 iface_entry->ifname,
884                                 iface_entry->active,
885                                 iface_entry->totals_via_dev[IFS_RX].bytes,
886                                 iface_entry->totals_via_dev[IFS_RX].packets,
887                                 iface_entry->totals_via_dev[IFS_TX].bytes,
888                                 iface_entry->totals_via_dev[IFS_TX].packets,
889                                 stats->rx_bytes, stats->rx_packets,
890                                 stats->tx_bytes, stats->tx_packets
891                                 );
892                 } else {
893                         len = pp_iface_stat_line(false, outp, char_count,
894                                                  iface_entry);
895                 }
896                 if (len >= char_count) {
897                         spin_unlock_bh(&iface_stat_list_lock);
898                         *outp = '\0';
899                         return outp - page;
900                 }
901                 outp += len;
902                 char_count -= len;
903                 (*num_items_returned)++;
904         }
905         spin_unlock_bh(&iface_stat_list_lock);
906
907         *eof = 1;
908         return outp - page;
909 }
910
911 static void iface_create_proc_worker(struct work_struct *work)
912 {
913         struct proc_dir_entry *proc_entry;
914         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
915                                                    iface_work);
916         struct iface_stat *new_iface  = isw->iface_entry;
917
918         /* iface_entries are not deleted, so safe to manipulate. */
919         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
920         if (IS_ERR_OR_NULL(proc_entry)) {
921                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
922                 kfree(isw);
923                 return;
924         }
925
926         new_iface->proc_ptr = proc_entry;
927
928         create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
929                                read_proc_u64,
930                                &new_iface->totals_via_dev[IFS_TX].bytes);
931         create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
932                                read_proc_u64,
933                                &new_iface->totals_via_dev[IFS_RX].bytes);
934         create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
935                                read_proc_u64,
936                                &new_iface->totals_via_dev[IFS_TX].packets);
937         create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
938                                read_proc_u64,
939                                &new_iface->totals_via_dev[IFS_RX].packets);
940         create_proc_read_entry("active", proc_iface_perms, proc_entry,
941                         read_proc_bool, &new_iface->active);
942
943         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
944                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
945         kfree(isw);
946 }
947
948 /*
949  * Will set the entry's active state, and
950  * update the net_dev accordingly also.
951  */
952 static void _iface_stat_set_active(struct iface_stat *entry,
953                                    struct net_device *net_dev,
954                                    bool activate)
955 {
956         if (activate) {
957                 entry->net_dev = net_dev;
958                 entry->active = true;
959                 IF_DEBUG("qtaguid: %s(%s): "
960                          "enable tracking. rfcnt=%d\n", __func__,
961                          entry->ifname,
962                          __this_cpu_read(*net_dev->pcpu_refcnt));
963         } else {
964                 entry->active = false;
965                 entry->net_dev = NULL;
966                 IF_DEBUG("qtaguid: %s(%s): "
967                          "disable tracking. rfcnt=%d\n", __func__,
968                          entry->ifname,
969                          __this_cpu_read(*net_dev->pcpu_refcnt));
970
971         }
972 }
973
974 /* Caller must hold iface_stat_list_lock */
975 static struct iface_stat *iface_alloc(struct net_device *net_dev)
976 {
977         struct iface_stat *new_iface;
978         struct iface_stat_work *isw;
979
980         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
981         if (new_iface == NULL) {
982                 pr_err("qtaguid: iface_stat: create(%s): "
983                        "iface_stat alloc failed\n", net_dev->name);
984                 return NULL;
985         }
986         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
987         if (new_iface->ifname == NULL) {
988                 pr_err("qtaguid: iface_stat: create(%s): "
989                        "ifname alloc failed\n", net_dev->name);
990                 kfree(new_iface);
991                 return NULL;
992         }
993         spin_lock_init(&new_iface->tag_stat_list_lock);
994         new_iface->tag_stat_tree = RB_ROOT;
995         _iface_stat_set_active(new_iface, net_dev, true);
996
997         /*
998          * ipv6 notifier chains are atomic :(
999          * No create_proc_read_entry() for you!
1000          */
1001         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
1002         if (!isw) {
1003                 pr_err("qtaguid: iface_stat: create(%s): "
1004                        "work alloc failed\n", new_iface->ifname);
1005                 _iface_stat_set_active(new_iface, net_dev, false);
1006                 kfree(new_iface->ifname);
1007                 kfree(new_iface);
1008                 return NULL;
1009         }
1010         isw->iface_entry = new_iface;
1011         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
1012         schedule_work(&isw->iface_work);
1013         list_add(&new_iface->list, &iface_stat_list);
1014         return new_iface;
1015 }
1016
1017 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
1018                                                struct iface_stat *iface)
1019 {
1020         struct rtnl_link_stats64 dev_stats, *stats;
1021         bool stats_rewound;
1022
1023         stats = dev_get_stats(net_dev, &dev_stats);
1024         /* No empty packets */
1025         stats_rewound =
1026                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
1027                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
1028
1029         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
1030                  "bytes rx/tx=%llu/%llu "
1031                  "active=%d last_known=%d "
1032                  "stats_rewound=%d\n", __func__,
1033                  net_dev ? net_dev->name : "?",
1034                  iface, net_dev,
1035                  stats->rx_bytes, stats->tx_bytes,
1036                  iface->active, iface->last_known_valid, stats_rewound);
1037
1038         if (iface->active && iface->last_known_valid && stats_rewound) {
1039                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
1040                              "iface reset its stats unexpectedly\n", __func__,
1041                              net_dev->name);
1042
1043                 iface->totals_via_dev[IFS_TX].bytes +=
1044                         iface->last_known[IFS_TX].bytes;
1045                 iface->totals_via_dev[IFS_TX].packets +=
1046                         iface->last_known[IFS_TX].packets;
1047                 iface->totals_via_dev[IFS_RX].bytes +=
1048                         iface->last_known[IFS_RX].bytes;
1049                 iface->totals_via_dev[IFS_RX].packets +=
1050                         iface->last_known[IFS_RX].packets;
1051                 iface->last_known_valid = false;
1052                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
1053                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
1054                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
1055                          iface->last_known[IFS_TX].bytes);
1056         }
1057 }
1058
1059 /*
1060  * Create a new entry for tracking the specified interface.
1061  * Do nothing if the entry already exists.
1062  * Called when an interface is configured with a valid IP address.
1063  */
1064 static void iface_stat_create(struct net_device *net_dev,
1065                               struct in_ifaddr *ifa)
1066 {
1067         struct in_device *in_dev = NULL;
1068         const char *ifname;
1069         struct iface_stat *entry;
1070         __be32 ipaddr = 0;
1071         struct iface_stat *new_iface;
1072
1073         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1074                  net_dev ? net_dev->name : "?",
1075                  ifa, net_dev);
1076         if (!net_dev) {
1077                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1078                 return;
1079         }
1080
1081         ifname = net_dev->name;
1082         if (!ifa) {
1083                 in_dev = in_dev_get(net_dev);
1084                 if (!in_dev) {
1085                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1086                                ifname);
1087                         return;
1088                 }
1089                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1090                          ifname, in_dev);
1091                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1092                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
1093                                  "ifa=%p ifa_label=%s\n",
1094                                  ifname, ifa,
1095                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
1096                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1097                                 break;
1098                 }
1099         }
1100
1101         if (!ifa) {
1102                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1103                          ifname);
1104                 goto done_put;
1105         }
1106         ipaddr = ifa->ifa_local;
1107
1108         spin_lock_bh(&iface_stat_list_lock);
1109         entry = get_iface_entry(ifname);
1110         if (entry != NULL) {
1111                 bool activate = !ipv4_is_loopback(ipaddr);
1112                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1113                          ifname, entry);
1114                 iface_check_stats_reset_and_adjust(net_dev, entry);
1115                 _iface_stat_set_active(entry, net_dev, activate);
1116                 IF_DEBUG("qtaguid: %s(%s): "
1117                          "tracking now %d on ip=%pI4\n", __func__,
1118                          entry->ifname, activate, &ipaddr);
1119                 goto done_unlock_put;
1120         } else if (ipv4_is_loopback(ipaddr)) {
1121                 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1122                          "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
1123                 goto done_unlock_put;
1124         }
1125
1126         new_iface = iface_alloc(net_dev);
1127         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1128                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1129 done_unlock_put:
1130         spin_unlock_bh(&iface_stat_list_lock);
1131 done_put:
1132         if (in_dev)
1133                 in_dev_put(in_dev);
1134 }
1135
1136 static void iface_stat_create_ipv6(struct net_device *net_dev,
1137                                    struct inet6_ifaddr *ifa)
1138 {
1139         struct in_device *in_dev;
1140         const char *ifname;
1141         struct iface_stat *entry;
1142         struct iface_stat *new_iface;
1143         int addr_type;
1144
1145         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1146                  ifa, net_dev, net_dev ? net_dev->name : "");
1147         if (!net_dev) {
1148                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1149                 return;
1150         }
1151         ifname = net_dev->name;
1152
1153         in_dev = in_dev_get(net_dev);
1154         if (!in_dev) {
1155                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1156                        ifname);
1157                 return;
1158         }
1159
1160         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1161                  ifname, in_dev);
1162
1163         if (!ifa) {
1164                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1165                          ifname);
1166                 goto done_put;
1167         }
1168         addr_type = ipv6_addr_type(&ifa->addr);
1169
1170         spin_lock_bh(&iface_stat_list_lock);
1171         entry = get_iface_entry(ifname);
1172         if (entry != NULL) {
1173                 bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
1174                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1175                          ifname, entry);
1176                 iface_check_stats_reset_and_adjust(net_dev, entry);
1177                 _iface_stat_set_active(entry, net_dev, activate);
1178                 IF_DEBUG("qtaguid: %s(%s): "
1179                          "tracking now %d on ip=%pI6c\n", __func__,
1180                          entry->ifname, activate, &ifa->addr);
1181                 goto done_unlock_put;
1182         } else if (addr_type & IPV6_ADDR_LOOPBACK) {
1183                 IF_DEBUG("qtaguid: %s(%s): "
1184                          "ignore loopback dev. ip=%pI6c\n", __func__,
1185                          ifname, &ifa->addr);
1186                 goto done_unlock_put;
1187         }
1188
1189         new_iface = iface_alloc(net_dev);
1190         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1191                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1192
1193 done_unlock_put:
1194         spin_unlock_bh(&iface_stat_list_lock);
1195 done_put:
1196         in_dev_put(in_dev);
1197 }
1198
1199 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1200 {
1201         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1202         return sock_tag_tree_search(&sock_tag_tree, sk);
1203 }
1204
1205 static struct sock_tag *get_sock_stat(const struct sock *sk)
1206 {
1207         struct sock_tag *sock_tag_entry;
1208         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1209         if (!sk)
1210                 return NULL;
1211         spin_lock_bh(&sock_tag_list_lock);
1212         sock_tag_entry = get_sock_stat_nl(sk);
1213         spin_unlock_bh(&sock_tag_list_lock);
1214         return sock_tag_entry;
1215 }
1216
1217 static int ipx_proto(const struct sk_buff *skb,
1218                      struct xt_action_param *par)
1219 {
1220         int thoff = 0, tproto;
1221
1222         switch (par->family) {
1223         case NFPROTO_IPV6:
1224                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1225                 if (tproto < 0)
1226                         MT_DEBUG("%s(): transport header not found in ipv6"
1227                                  " skb=%p\n", __func__, skb);
1228                 break;
1229         case NFPROTO_IPV4:
1230                 tproto = ip_hdr(skb)->protocol;
1231                 break;
1232         default:
1233                 tproto = IPPROTO_RAW;
1234         }
1235         return tproto;
1236 }
1237
1238 static void
1239 data_counters_update(struct data_counters *dc, int set,
1240                      enum ifs_tx_rx direction, int proto, int bytes)
1241 {
1242         switch (proto) {
1243         case IPPROTO_TCP:
1244                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1245                 break;
1246         case IPPROTO_UDP:
1247                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1248                 break;
1249         case IPPROTO_IP:
1250         default:
1251                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1252                                     1);
1253                 break;
1254         }
1255 }
1256
1257 /*
1258  * Update stats for the specified interface. Do nothing if the entry
1259  * does not exist (when a device was never configured with an IP address).
1260  * Called when an device is being unregistered.
1261  */
1262 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1263 {
1264         struct rtnl_link_stats64 dev_stats, *stats;
1265         struct iface_stat *entry;
1266
1267         stats = dev_get_stats(net_dev, &dev_stats);
1268         spin_lock_bh(&iface_stat_list_lock);
1269         entry = get_iface_entry(net_dev->name);
1270         if (entry == NULL) {
1271                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1272                          net_dev->name);
1273                 spin_unlock_bh(&iface_stat_list_lock);
1274                 return;
1275         }
1276
1277         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1278                  net_dev->name, entry);
1279         if (!entry->active) {
1280                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1281                          net_dev->name);
1282                 spin_unlock_bh(&iface_stat_list_lock);
1283                 return;
1284         }
1285
1286         if (stash_only) {
1287                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1288                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1289                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1290                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1291                 entry->last_known_valid = true;
1292                 IF_DEBUG("qtaguid: %s(%s): "
1293                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1294                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1295                 spin_unlock_bh(&iface_stat_list_lock);
1296                 return;
1297         }
1298         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1299         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1300         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1301         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1302         /* We don't need the last_known[] anymore */
1303         entry->last_known_valid = false;
1304         _iface_stat_set_active(entry, net_dev, false);
1305         IF_DEBUG("qtaguid: %s(%s): "
1306                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1307                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1308         spin_unlock_bh(&iface_stat_list_lock);
1309 }
1310
1311 /*
1312  * Update stats for the specified interface from the skb.
1313  * Do nothing if the entry
1314  * does not exist (when a device was never configured with an IP address).
1315  * Called on each sk.
1316  */
1317 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1318                                        struct xt_action_param *par)
1319 {
1320         struct iface_stat *entry;
1321         const struct net_device *el_dev;
1322         enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1323         int bytes = skb->len;
1324         int proto;
1325
1326         if (!skb->dev) {
1327                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1328                 el_dev = par->in ? : par->out;
1329         } else {
1330                 const struct net_device *other_dev;
1331                 el_dev = skb->dev;
1332                 other_dev = par->in ? : par->out;
1333                 if (el_dev != other_dev) {
1334                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1335                                  "par->(in/out)=%p %s\n",
1336                                  par->hooknum, el_dev, el_dev->name, other_dev,
1337                                  other_dev->name);
1338                 }
1339         }
1340
1341         if (unlikely(!el_dev)) {
1342                 pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
1343                        par->hooknum, __func__);
1344                 BUG();
1345         } else if (unlikely(!el_dev->name)) {
1346                 pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
1347                        par->hooknum, __func__);
1348                 BUG();
1349         } else {
1350                 proto = ipx_proto(skb, par);
1351                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1352                          par->hooknum, el_dev->name, el_dev->type,
1353                          par->family, proto);
1354         }
1355
1356         spin_lock_bh(&iface_stat_list_lock);
1357         entry = get_iface_entry(el_dev->name);
1358         if (entry == NULL) {
1359                 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1360                          __func__, el_dev->name);
1361                 spin_unlock_bh(&iface_stat_list_lock);
1362                 return;
1363         }
1364
1365         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1366                  el_dev->name, entry);
1367
1368         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1369                              bytes);
1370         spin_unlock_bh(&iface_stat_list_lock);
1371 }
1372
1373 static void tag_stat_update(struct tag_stat *tag_entry,
1374                         enum ifs_tx_rx direction, int proto, int bytes)
1375 {
1376         int active_set;
1377         active_set = get_active_counter_set(tag_entry->tn.tag);
1378         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1379                  "dir=%d proto=%d bytes=%d)\n",
1380                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1381                  active_set, direction, proto, bytes);
1382         data_counters_update(&tag_entry->counters, active_set, direction,
1383                              proto, bytes);
1384         if (tag_entry->parent_counters)
1385                 data_counters_update(tag_entry->parent_counters, active_set,
1386                                      direction, proto, bytes);
1387 }
1388
1389 /*
1390  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1391  * the interface.
1392  * iface_entry->tag_stat_list_lock should be held.
1393  */
1394 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1395                                            tag_t tag)
1396 {
1397         struct tag_stat *new_tag_stat_entry = NULL;
1398         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1399                  " (uid=%u)\n", __func__,
1400                  iface_entry, tag, get_uid_from_tag(tag));
1401         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1402         if (!new_tag_stat_entry) {
1403                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1404                 goto done;
1405         }
1406         new_tag_stat_entry->tn.tag = tag;
1407         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1408 done:
1409         return new_tag_stat_entry;
1410 }
1411
1412 static void if_tag_stat_update(const char *ifname, uid_t uid,
1413                                const struct sock *sk, enum ifs_tx_rx direction,
1414                                int proto, int bytes)
1415 {
1416         struct tag_stat *tag_stat_entry;
1417         tag_t tag, acct_tag;
1418         tag_t uid_tag;
1419         struct data_counters *uid_tag_counters;
1420         struct sock_tag *sock_tag_entry;
1421         struct iface_stat *iface_entry;
1422         struct tag_stat *new_tag_stat = NULL;
1423         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1424                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1425                  ifname, uid, sk, direction, proto, bytes);
1426
1427
1428         iface_entry = get_iface_entry(ifname);
1429         if (!iface_entry) {
1430                 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1431                        ifname);
1432                 return;
1433         }
1434         /* It is ok to process data when an iface_entry is inactive */
1435
1436         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1437                  ifname, iface_entry);
1438
1439         /*
1440          * Look for a tagged sock.
1441          * It will have an acct_uid.
1442          */
1443         sock_tag_entry = get_sock_stat(sk);
1444         if (sock_tag_entry) {
1445                 tag = sock_tag_entry->tag;
1446                 acct_tag = get_atag_from_tag(tag);
1447                 uid_tag = get_utag_from_tag(tag);
1448         } else {
1449                 acct_tag = make_atag_from_value(0);
1450                 tag = combine_atag_with_uid(acct_tag, uid);
1451                 uid_tag = make_tag_from_uid(uid);
1452         }
1453         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1454                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1455                  tag, get_uid_from_tag(tag), iface_entry);
1456         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1457         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1458
1459         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1460                                               tag);
1461         if (tag_stat_entry) {
1462                 /*
1463                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1464                  * {0, uid_tag} will also get updated.
1465                  */
1466                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1467                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1468                 return;
1469         }
1470
1471         /* Loop over tag list under this interface for {0,uid_tag} */
1472         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1473                                               uid_tag);
1474         if (!tag_stat_entry) {
1475                 /* Here: the base uid_tag did not exist */
1476                 /*
1477                  * No parent counters. So
1478                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1479                  */
1480                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1481                 if (!new_tag_stat)
1482                         goto unlock;
1483                 uid_tag_counters = &new_tag_stat->counters;
1484         } else {
1485                 uid_tag_counters = &tag_stat_entry->counters;
1486         }
1487
1488         if (acct_tag) {
1489                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1490                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1491                 if (!new_tag_stat)
1492                         goto unlock;
1493                 new_tag_stat->parent_counters = uid_tag_counters;
1494         } else {
1495                 /*
1496                  * For new_tag_stat to be still NULL here would require:
1497                  *  {0, uid_tag} exists
1498                  *  and {acct_tag, uid_tag} doesn't exist
1499                  *  AND acct_tag == 0.
1500                  * Impossible. This reassures us that new_tag_stat
1501                  * below will always be assigned.
1502                  */
1503                 BUG_ON(!new_tag_stat);
1504         }
1505         tag_stat_update(new_tag_stat, direction, proto, bytes);
1506 unlock:
1507         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1508 }
1509
1510 static int iface_netdev_event_handler(struct notifier_block *nb,
1511                                       unsigned long event, void *ptr) {
1512         struct net_device *dev = ptr;
1513
1514         if (unlikely(module_passive))
1515                 return NOTIFY_DONE;
1516
1517         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1518                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1519                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1520
1521         switch (event) {
1522         case NETDEV_UP:
1523                 iface_stat_create(dev, NULL);
1524                 atomic64_inc(&qtu_events.iface_events);
1525                 break;
1526         case NETDEV_DOWN:
1527         case NETDEV_UNREGISTER:
1528                 iface_stat_update(dev, event == NETDEV_DOWN);
1529                 atomic64_inc(&qtu_events.iface_events);
1530                 break;
1531         }
1532         return NOTIFY_DONE;
1533 }
1534
1535 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1536                                          unsigned long event, void *ptr)
1537 {
1538         struct inet6_ifaddr *ifa = ptr;
1539         struct net_device *dev;
1540
1541         if (unlikely(module_passive))
1542                 return NOTIFY_DONE;
1543
1544         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1545                  "ev=0x%lx/%s ifa=%p\n",
1546                  event, netdev_evt_str(event), ifa);
1547
1548         switch (event) {
1549         case NETDEV_UP:
1550                 BUG_ON(!ifa || !ifa->idev);
1551                 dev = (struct net_device *)ifa->idev->dev;
1552                 iface_stat_create_ipv6(dev, ifa);
1553                 atomic64_inc(&qtu_events.iface_events);
1554                 break;
1555         case NETDEV_DOWN:
1556         case NETDEV_UNREGISTER:
1557                 BUG_ON(!ifa || !ifa->idev);
1558                 dev = (struct net_device *)ifa->idev->dev;
1559                 iface_stat_update(dev, event == NETDEV_DOWN);
1560                 atomic64_inc(&qtu_events.iface_events);
1561                 break;
1562         }
1563         return NOTIFY_DONE;
1564 }
1565
1566 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1567                                         unsigned long event, void *ptr)
1568 {
1569         struct in_ifaddr *ifa = ptr;
1570         struct net_device *dev;
1571
1572         if (unlikely(module_passive))
1573                 return NOTIFY_DONE;
1574
1575         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1576                  "ev=0x%lx/%s ifa=%p\n",
1577                  event, netdev_evt_str(event), ifa);
1578
1579         switch (event) {
1580         case NETDEV_UP:
1581                 BUG_ON(!ifa || !ifa->ifa_dev);
1582                 dev = ifa->ifa_dev->dev;
1583                 iface_stat_create(dev, ifa);
1584                 atomic64_inc(&qtu_events.iface_events);
1585                 break;
1586         case NETDEV_DOWN:
1587         case NETDEV_UNREGISTER:
1588                 BUG_ON(!ifa || !ifa->ifa_dev);
1589                 dev = ifa->ifa_dev->dev;
1590                 iface_stat_update(dev, event == NETDEV_DOWN);
1591                 atomic64_inc(&qtu_events.iface_events);
1592                 break;
1593         }
1594         return NOTIFY_DONE;
1595 }
1596
1597 static struct notifier_block iface_netdev_notifier_blk = {
1598         .notifier_call = iface_netdev_event_handler,
1599 };
1600
1601 static struct notifier_block iface_inetaddr_notifier_blk = {
1602         .notifier_call = iface_inetaddr_event_handler,
1603 };
1604
1605 static struct notifier_block iface_inet6addr_notifier_blk = {
1606         .notifier_call = iface_inet6addr_event_handler,
1607 };
1608
1609 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1610 {
1611         int err;
1612
1613         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1614         if (!iface_stat_procdir) {
1615                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1616                 err = -1;
1617                 goto err;
1618         }
1619
1620         iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1621                                                     proc_iface_perms,
1622                                                     parent_procdir);
1623         if (!iface_stat_all_procfile) {
1624                 pr_err("qtaguid: iface_stat: init "
1625                        " failed to create stat_old proc entry\n");
1626                 err = -1;
1627                 goto err_zap_entry;
1628         }
1629         iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read;
1630         iface_stat_all_procfile->data = (void *)1; /* fmt1 */
1631
1632         iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename,
1633                                                     proc_iface_perms,
1634                                                     parent_procdir);
1635         if (!iface_stat_fmt_procfile) {
1636                 pr_err("qtaguid: iface_stat: init "
1637                        " failed to create stat_all proc entry\n");
1638                 err = -1;
1639                 goto err_zap_all_stats_entry;
1640         }
1641         iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read;
1642         iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */
1643
1644
1645         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1646         if (err) {
1647                 pr_err("qtaguid: iface_stat: init "
1648                        "failed to register dev event handler\n");
1649                 goto err_zap_all_stats_entries;
1650         }
1651         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1652         if (err) {
1653                 pr_err("qtaguid: iface_stat: init "
1654                        "failed to register ipv4 dev event handler\n");
1655                 goto err_unreg_nd;
1656         }
1657
1658         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1659         if (err) {
1660                 pr_err("qtaguid: iface_stat: init "
1661                        "failed to register ipv6 dev event handler\n");
1662                 goto err_unreg_ip4_addr;
1663         }
1664         return 0;
1665
1666 err_unreg_ip4_addr:
1667         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1668 err_unreg_nd:
1669         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1670 err_zap_all_stats_entries:
1671         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1672 err_zap_all_stats_entry:
1673         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1674 err_zap_entry:
1675         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1676 err:
1677         return err;
1678 }
1679
1680 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1681                                     struct xt_action_param *par)
1682 {
1683         struct sock *sk;
1684         unsigned int hook_mask = (1 << par->hooknum);
1685
1686         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1687                  par->hooknum, par->family);
1688
1689         /*
1690          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1691          * return garbage SKs.
1692          */
1693         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1694                 return NULL;
1695
1696         switch (par->family) {
1697         case NFPROTO_IPV6:
1698                 sk = xt_socket_get6_sk(skb, par);
1699                 break;
1700         case NFPROTO_IPV4:
1701                 sk = xt_socket_get4_sk(skb, par);
1702                 break;
1703         default:
1704                 return NULL;
1705         }
1706
1707         /*
1708          * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1709          * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1710          * Not fixed in 3.0-r3 :(
1711          */
1712         if (sk) {
1713                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1714                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1715                 if (sk->sk_state  == TCP_TIME_WAIT) {
1716                         xt_socket_put_sk(sk);
1717                         sk = NULL;
1718                 }
1719         }
1720         return sk;
1721 }
1722
1723 static void account_for_uid(const struct sk_buff *skb,
1724                             const struct sock *alternate_sk, uid_t uid,
1725                             struct xt_action_param *par)
1726 {
1727         const struct net_device *el_dev;
1728
1729         if (!skb->dev) {
1730                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1731                 el_dev = par->in ? : par->out;
1732         } else {
1733                 const struct net_device *other_dev;
1734                 el_dev = skb->dev;
1735                 other_dev = par->in ? : par->out;
1736                 if (el_dev != other_dev) {
1737                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1738                                 "par->(in/out)=%p %s\n",
1739                                 par->hooknum, el_dev, el_dev->name, other_dev,
1740                                 other_dev->name);
1741                 }
1742         }
1743
1744         if (unlikely(!el_dev)) {
1745                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1746         } else if (unlikely(!el_dev->name)) {
1747                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1748         } else {
1749                 int proto = ipx_proto(skb, par);
1750                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1751                          par->hooknum, el_dev->name, el_dev->type,
1752                          par->family, proto);
1753
1754                 if_tag_stat_update(el_dev->name, uid,
1755                                 skb->sk ? skb->sk : alternate_sk,
1756                                 par->in ? IFS_RX : IFS_TX,
1757                                 proto, skb->len);
1758         }
1759 }
1760
1761 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1762 {
1763         const struct xt_qtaguid_match_info *info = par->matchinfo;
1764         const struct file *filp;
1765         bool got_sock = false;
1766         struct sock *sk;
1767         uid_t sock_uid;
1768         bool res;
1769
1770         if (unlikely(module_passive))
1771                 return (info->match ^ info->invert) == 0;
1772
1773         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1774                  par->hooknum, skb, par->in, par->out, par->family);
1775
1776         atomic64_inc(&qtu_events.match_calls);
1777         if (skb == NULL) {
1778                 res = (info->match ^ info->invert) == 0;
1779                 goto ret_res;
1780         }
1781
1782         switch (par->hooknum) {
1783         case NF_INET_PRE_ROUTING:
1784         case NF_INET_POST_ROUTING:
1785                 atomic64_inc(&qtu_events.match_calls_prepost);
1786                 iface_stat_update_from_skb(skb, par);
1787                 /*
1788                  * We are done in pre/post. The skb will get processed
1789                  * further alter.
1790                  */
1791                 res = (info->match ^ info->invert);
1792                 goto ret_res;
1793                 break;
1794         /* default: Fall through and do UID releated work */
1795         }
1796
1797         sk = skb->sk;
1798         if (sk == NULL) {
1799                 /*
1800                  * A missing sk->sk_socket happens when packets are in-flight
1801                  * and the matching socket is already closed and gone.
1802                  */
1803                 sk = qtaguid_find_sk(skb, par);
1804                 /*
1805                  * If we got the socket from the find_sk(), we will need to put
1806                  * it back, as nf_tproxy_get_sock_v4() got it.
1807                  */
1808                 got_sock = sk;
1809                 if (sk)
1810                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1811                 else
1812                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1813         } else {
1814                 atomic64_inc(&qtu_events.match_found_sk);
1815         }
1816         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1817                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1818         if (sk != NULL) {
1819                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1820                         par->hooknum, sk, sk->sk_socket,
1821                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1822                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1823                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1824                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1825         }
1826
1827         if (sk == NULL || sk->sk_socket == NULL) {
1828                 /*
1829                  * Here, the qtaguid_find_sk() using connection tracking
1830                  * couldn't find the owner, so for now we just count them
1831                  * against the system.
1832                  */
1833                 /*
1834                  * TODO: unhack how to force just accounting.
1835                  * For now we only do iface stats when the uid-owner is not
1836                  * requested.
1837                  */
1838                 if (!(info->match & XT_QTAGUID_UID))
1839                         account_for_uid(skb, sk, 0, par);
1840                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1841                         par->hooknum,
1842                         sk ? sk->sk_socket : NULL);
1843                 res = (info->match ^ info->invert) == 0;
1844                 atomic64_inc(&qtu_events.match_no_sk);
1845                 goto put_sock_ret_res;
1846         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1847                 res = false;
1848                 goto put_sock_ret_res;
1849         }
1850         filp = sk->sk_socket->file;
1851         if (filp == NULL) {
1852                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1853                 account_for_uid(skb, sk, 0, par);
1854                 res = ((info->match ^ info->invert) &
1855                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1856                 atomic64_inc(&qtu_events.match_no_sk_file);
1857                 goto put_sock_ret_res;
1858         }
1859         sock_uid = filp->f_cred->fsuid;
1860         /*
1861          * TODO: unhack how to force just accounting.
1862          * For now we only do iface stats when the uid-owner is not requested
1863          */
1864         if (!(info->match & XT_QTAGUID_UID))
1865                 account_for_uid(skb, sk, sock_uid, par);
1866
1867         /*
1868          * The following two tests fail the match when:
1869          *    id not in range AND no inverted condition requested
1870          * or id     in range AND    inverted condition requested
1871          * Thus (!a && b) || (a && !b) == a ^ b
1872          */
1873         if (info->match & XT_QTAGUID_UID)
1874                 if ((filp->f_cred->fsuid >= info->uid_min &&
1875                      filp->f_cred->fsuid <= info->uid_max) ^
1876                     !(info->invert & XT_QTAGUID_UID)) {
1877                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1878                                  par->hooknum);
1879                         res = false;
1880                         goto put_sock_ret_res;
1881                 }
1882         if (info->match & XT_QTAGUID_GID)
1883                 if ((filp->f_cred->fsgid >= info->gid_min &&
1884                                 filp->f_cred->fsgid <= info->gid_max) ^
1885                         !(info->invert & XT_QTAGUID_GID)) {
1886                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1887                                 par->hooknum);
1888                         res = false;
1889                         goto put_sock_ret_res;
1890                 }
1891
1892         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1893         res = true;
1894
1895 put_sock_ret_res:
1896         if (got_sock)
1897                 xt_socket_put_sk(sk);
1898 ret_res:
1899         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1900         return res;
1901 }
1902
1903 #ifdef DDEBUG
1904 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1905 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1906 {
1907         va_list args;
1908         char *fmt_buff;
1909         char *buff;
1910
1911         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1912                 return;
1913
1914         fmt_buff = kasprintf(GFP_ATOMIC,
1915                              "qtaguid: %s(): %s {\n", __func__, fmt);
1916         BUG_ON(!fmt_buff);
1917         va_start(args, fmt);
1918         buff = kvasprintf(GFP_ATOMIC,
1919                           fmt_buff, args);
1920         BUG_ON(!buff);
1921         pr_debug("%s", buff);
1922         kfree(fmt_buff);
1923         kfree(buff);
1924         va_end(args);
1925
1926         spin_lock_bh(&sock_tag_list_lock);
1927         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1928         spin_unlock_bh(&sock_tag_list_lock);
1929
1930         spin_lock_bh(&sock_tag_list_lock);
1931         spin_lock_bh(&uid_tag_data_tree_lock);
1932         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1933         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1934         spin_unlock_bh(&uid_tag_data_tree_lock);
1935         spin_unlock_bh(&sock_tag_list_lock);
1936
1937         spin_lock_bh(&iface_stat_list_lock);
1938         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1939         spin_unlock_bh(&iface_stat_list_lock);
1940
1941         pr_debug("qtaguid: %s(): }\n", __func__);
1942 }
1943 #else
1944 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1945 #endif
1946
1947 /*
1948  * Procfs reader to get all active socket tags using style "1)" as described in
1949  * fs/proc/generic.c
1950  */
1951 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1952                                   off_t items_to_skip, int char_count, int *eof,
1953                                   void *data)
1954 {
1955         char *outp = page;
1956         int len;
1957         uid_t uid;
1958         struct rb_node *node;
1959         struct sock_tag *sock_tag_entry;
1960         int item_index = 0;
1961         int indent_level = 0;
1962         long f_count;
1963
1964         if (unlikely(module_passive)) {
1965                 *eof = 1;
1966                 return 0;
1967         }
1968
1969         if (*eof)
1970                 return 0;
1971
1972         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u "
1973                  "page=%p off=%ld char_count=%d *eof=%d\n",
1974                  current->pid, current->tgid, current_fsuid(),
1975                  page, items_to_skip, char_count, *eof);
1976
1977         spin_lock_bh(&sock_tag_list_lock);
1978         for (node = rb_first(&sock_tag_tree);
1979              node;
1980              node = rb_next(node)) {
1981                 if (item_index++ < items_to_skip)
1982                         continue;
1983                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1984                 uid = get_uid_from_tag(sock_tag_entry->tag);
1985                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1986                          "pid=%u\n",
1987                          sock_tag_entry->sk,
1988                          sock_tag_entry->tag,
1989                          uid,
1990                          sock_tag_entry->pid
1991                         );
1992                 f_count = atomic_long_read(
1993                         &sock_tag_entry->socket->file->f_count);
1994                 len = snprintf(outp, char_count,
1995                                "sock=%p tag=0x%llx (uid=%u) pid=%u "
1996                                "f_count=%lu\n",
1997                                sock_tag_entry->sk,
1998                                sock_tag_entry->tag, uid,
1999                                sock_tag_entry->pid, f_count);
2000                 if (len >= char_count) {
2001                         spin_unlock_bh(&sock_tag_list_lock);
2002                         *outp = '\0';
2003                         return outp - page;
2004                 }
2005                 outp += len;
2006                 char_count -= len;
2007                 (*num_items_returned)++;
2008         }
2009         spin_unlock_bh(&sock_tag_list_lock);
2010
2011         if (item_index++ >= items_to_skip) {
2012                 len = snprintf(outp, char_count,
2013                                "events: sockets_tagged=%llu "
2014                                "sockets_untagged=%llu "
2015                                "counter_set_changes=%llu "
2016                                "delete_cmds=%llu "
2017                                "iface_events=%llu "
2018                                "match_calls=%llu "
2019                                "match_calls_prepost=%llu "
2020                                "match_found_sk=%llu "
2021                                "match_found_sk_in_ct=%llu "
2022                                "match_found_no_sk_in_ct=%llu "
2023                                "match_no_sk=%llu "
2024                                "match_no_sk_file=%llu\n",
2025                                atomic64_read(&qtu_events.sockets_tagged),
2026                                atomic64_read(&qtu_events.sockets_untagged),
2027                                atomic64_read(&qtu_events.counter_set_changes),
2028                                atomic64_read(&qtu_events.delete_cmds),
2029                                atomic64_read(&qtu_events.iface_events),
2030                                atomic64_read(&qtu_events.match_calls),
2031                                atomic64_read(&qtu_events.match_calls_prepost),
2032                                atomic64_read(&qtu_events.match_found_sk),
2033                                atomic64_read(&qtu_events.match_found_sk_in_ct),
2034                                atomic64_read(
2035                                        &qtu_events.match_found_no_sk_in_ct),
2036                                atomic64_read(&qtu_events.match_no_sk),
2037                                atomic64_read(&qtu_events.match_no_sk_file));
2038                 if (len >= char_count) {
2039                         *outp = '\0';
2040                         return outp - page;
2041                 }
2042                 outp += len;
2043                 char_count -= len;
2044                 (*num_items_returned)++;
2045         }
2046
2047         /* Count the following as part of the last item_index */
2048         if (item_index > items_to_skip) {
2049                 prdebug_full_state(indent_level, "proc ctrl");
2050         }
2051
2052         *eof = 1;
2053         return outp - page;
2054 }
2055
2056 /*
2057  * Delete socket tags, and stat tags associated with a given
2058  * accouting tag and uid.
2059  */
2060 static int ctrl_cmd_delete(const char *input)
2061 {
2062         char cmd;
2063         uid_t uid;
2064         uid_t entry_uid;
2065         tag_t acct_tag;
2066         tag_t tag;
2067         int res, argc;
2068         struct iface_stat *iface_entry;
2069         struct rb_node *node;
2070         struct sock_tag *st_entry;
2071         struct rb_root st_to_free_tree = RB_ROOT;
2072         struct tag_stat *ts_entry;
2073         struct tag_counter_set *tcs_entry;
2074         struct tag_ref *tr_entry;
2075         struct uid_tag_data *utd_entry;
2076
2077         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
2078         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
2079                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2080                  acct_tag, uid);
2081         if (argc < 2) {
2082                 res = -EINVAL;
2083                 goto err;
2084         }
2085         if (!valid_atag(acct_tag)) {
2086                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2087                 res = -EINVAL;
2088                 goto err;
2089         }
2090         if (argc < 3) {
2091                 uid = current_fsuid();
2092         } else if (!can_impersonate_uid(uid)) {
2093                 pr_info("qtaguid: ctrl_delete(%s): "
2094                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2095                         input, current->pid, current->tgid, current_fsuid());
2096                 res = -EPERM;
2097                 goto err;
2098         }
2099
2100         tag = combine_atag_with_uid(acct_tag, uid);
2101         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2102                  "looking for tag=0x%llx (uid=%u)\n",
2103                  input, tag, uid);
2104
2105         /* Delete socket tags */
2106         spin_lock_bh(&sock_tag_list_lock);
2107         node = rb_first(&sock_tag_tree);
2108         while (node) {
2109                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2110                 entry_uid = get_uid_from_tag(st_entry->tag);
2111                 node = rb_next(node);
2112                 if (entry_uid != uid)
2113                         continue;
2114
2115                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2116                          input, st_entry->tag, entry_uid);
2117
2118                 if (!acct_tag || st_entry->tag == tag) {
2119                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2120                         /* Can't sockfd_put() within spinlock, do it later. */
2121                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2122                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2123                         BUG_ON(tr_entry->num_sock_tags <= 0);
2124                         tr_entry->num_sock_tags--;
2125                         /*
2126                          * TODO: remove if, and start failing.
2127                          * This is a hack to work around the fact that in some
2128                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2129                          * and are trying to work around apps
2130                          * that didn't open the /dev/xt_qtaguid.
2131                          */
2132                         if (st_entry->list.next && st_entry->list.prev)
2133                                 list_del(&st_entry->list);
2134                 }
2135         }
2136         spin_unlock_bh(&sock_tag_list_lock);
2137
2138         sock_tag_tree_erase(&st_to_free_tree);
2139
2140         /* Delete tag counter-sets */
2141         spin_lock_bh(&tag_counter_set_list_lock);
2142         /* Counter sets are only on the uid tag, not full tag */
2143         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2144         if (tcs_entry) {
2145                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2146                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2147                          input,
2148                          tcs_entry->tn.tag,
2149                          get_uid_from_tag(tcs_entry->tn.tag),
2150                          tcs_entry->active_set);
2151                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2152                 kfree(tcs_entry);
2153         }
2154         spin_unlock_bh(&tag_counter_set_list_lock);
2155
2156         /*
2157          * If acct_tag is 0, then all entries belonging to uid are
2158          * erased.
2159          */
2160         spin_lock_bh(&iface_stat_list_lock);
2161         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2162                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2163                 node = rb_first(&iface_entry->tag_stat_tree);
2164                 while (node) {
2165                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2166                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2167                         node = rb_next(node);
2168
2169                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2170                                  "ts tag=0x%llx (uid=%u)\n",
2171                                  input, ts_entry->tn.tag, entry_uid);
2172
2173                         if (entry_uid != uid)
2174                                 continue;
2175                         if (!acct_tag || ts_entry->tn.tag == tag) {
2176                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2177                                          "erase ts: %s 0x%llx %u\n",
2178                                          input, iface_entry->ifname,
2179                                          get_atag_from_tag(ts_entry->tn.tag),
2180                                          entry_uid);
2181                                 rb_erase(&ts_entry->tn.node,
2182                                          &iface_entry->tag_stat_tree);
2183                                 kfree(ts_entry);
2184                         }
2185                 }
2186                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2187         }
2188         spin_unlock_bh(&iface_stat_list_lock);
2189
2190         /* Cleanup the uid_tag_data */
2191         spin_lock_bh(&uid_tag_data_tree_lock);
2192         node = rb_first(&uid_tag_data_tree);
2193         while (node) {
2194                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2195                 entry_uid = utd_entry->uid;
2196                 node = rb_next(node);
2197
2198                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2199                          "utd uid=%u\n",
2200                          input, entry_uid);
2201
2202                 if (entry_uid != uid)
2203                         continue;
2204                 /*
2205                  * Go over the tag_refs, and those that don't have
2206                  * sock_tags using them are freed.
2207                  */
2208                 put_tag_ref_tree(tag, utd_entry);
2209                 put_utd_entry(utd_entry);
2210         }
2211         spin_unlock_bh(&uid_tag_data_tree_lock);
2212
2213         atomic64_inc(&qtu_events.delete_cmds);
2214         res = 0;
2215
2216 err:
2217         return res;
2218 }
2219
2220 static int ctrl_cmd_counter_set(const char *input)
2221 {
2222         char cmd;
2223         uid_t uid = 0;
2224         tag_t tag;
2225         int res, argc;
2226         struct tag_counter_set *tcs;
2227         int counter_set;
2228
2229         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2230         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2231                  "set=%d uid=%u\n", input, argc, cmd,
2232                  counter_set, uid);
2233         if (argc != 3) {
2234                 res = -EINVAL;
2235                 goto err;
2236         }
2237         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2238                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2239                         input);
2240                 res = -EINVAL;
2241                 goto err;
2242         }
2243         if (!can_manipulate_uids()) {
2244                 pr_info("qtaguid: ctrl_counterset(%s): "
2245                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2246                         input, current->pid, current->tgid, current_fsuid());
2247                 res = -EPERM;
2248                 goto err;
2249         }
2250
2251         tag = make_tag_from_uid(uid);
2252         spin_lock_bh(&tag_counter_set_list_lock);
2253         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2254         if (!tcs) {
2255                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2256                 if (!tcs) {
2257                         spin_unlock_bh(&tag_counter_set_list_lock);
2258                         pr_err("qtaguid: ctrl_counterset(%s): "
2259                                "failed to alloc counter set\n",
2260                                input);
2261                         res = -ENOMEM;
2262                         goto err;
2263                 }
2264                 tcs->tn.tag = tag;
2265                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2266                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2267                          "(uid=%u) set=%d\n",
2268                          input, tag, get_uid_from_tag(tag), counter_set);
2269         }
2270         tcs->active_set = counter_set;
2271         spin_unlock_bh(&tag_counter_set_list_lock);
2272         atomic64_inc(&qtu_events.counter_set_changes);
2273         res = 0;
2274
2275 err:
2276         return res;
2277 }
2278
2279 static int ctrl_cmd_tag(const char *input)
2280 {
2281         char cmd;
2282         int sock_fd = 0;
2283         uid_t uid = 0;
2284         tag_t acct_tag = make_atag_from_value(0);
2285         tag_t full_tag;
2286         struct socket *el_socket;
2287         int res, argc;
2288         struct sock_tag *sock_tag_entry;
2289         struct tag_ref *tag_ref_entry;
2290         struct uid_tag_data *uid_tag_data_entry;
2291         struct proc_qtu_data *pqd_entry;
2292
2293         /* Unassigned args will get defaulted later. */
2294         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2295         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2296                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2297                  acct_tag, uid);
2298         if (argc < 2) {
2299                 res = -EINVAL;
2300                 goto err;
2301         }
2302         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2303         if (!el_socket) {
2304                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2305                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2306                         input, sock_fd, res, current->pid, current->tgid,
2307                         current_fsuid());
2308                 goto err;
2309         }
2310         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2311                  input, atomic_long_read(&el_socket->file->f_count),
2312                  el_socket->sk);
2313         if (argc < 3) {
2314                 acct_tag = make_atag_from_value(0);
2315         } else if (!valid_atag(acct_tag)) {
2316                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2317                 res = -EINVAL;
2318                 goto err_put;
2319         }
2320         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2321                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2322                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2323                  input, current->pid, current->tgid, current_uid(),
2324                  current_euid(), current_fsuid(),
2325                  xt_qtaguid_ctrl_file->gid,
2326                  in_group_p(xt_qtaguid_ctrl_file->gid),
2327                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2328         if (argc < 4) {
2329                 uid = current_fsuid();
2330         } else if (!can_impersonate_uid(uid)) {
2331                 pr_info("qtaguid: ctrl_tag(%s): "
2332                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2333                         input, current->pid, current->tgid, current_fsuid());
2334                 res = -EPERM;
2335                 goto err_put;
2336         }
2337         full_tag = combine_atag_with_uid(acct_tag, uid);
2338
2339         spin_lock_bh(&sock_tag_list_lock);
2340         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2341         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2342         if (IS_ERR(tag_ref_entry)) {
2343                 res = PTR_ERR(tag_ref_entry);
2344                 spin_unlock_bh(&sock_tag_list_lock);
2345                 goto err_put;
2346         }
2347         tag_ref_entry->num_sock_tags++;
2348         if (sock_tag_entry) {
2349                 struct tag_ref *prev_tag_ref_entry;
2350
2351                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2352                          "st@%p ...->f_count=%ld\n",
2353                          input, el_socket->sk, sock_tag_entry,
2354                          atomic_long_read(&el_socket->file->f_count));
2355                 /*
2356                  * This is a re-tagging, so release the sock_fd that was
2357                  * locked at the time of the 1st tagging.
2358                  * There is still the ref from this call's sockfd_lookup() so
2359                  * it can be done within the spinlock.
2360                  */
2361                 sockfd_put(sock_tag_entry->socket);
2362                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2363                                                     &uid_tag_data_entry);
2364                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2365                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2366                 prev_tag_ref_entry->num_sock_tags--;
2367                 sock_tag_entry->tag = full_tag;
2368         } else {
2369                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2370                          input, el_socket->sk);
2371                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2372                                          GFP_ATOMIC);
2373                 if (!sock_tag_entry) {
2374                         pr_err("qtaguid: ctrl_tag(%s): "
2375                                "socket tag alloc failed\n",
2376                                input);
2377                         spin_unlock_bh(&sock_tag_list_lock);
2378                         res = -ENOMEM;
2379                         goto err_tag_unref_put;
2380                 }
2381                 sock_tag_entry->sk = el_socket->sk;
2382                 sock_tag_entry->socket = el_socket;
2383                 sock_tag_entry->pid = current->tgid;
2384                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2385                                                             uid);
2386                 spin_lock_bh(&uid_tag_data_tree_lock);
2387                 pqd_entry = proc_qtu_data_tree_search(
2388                         &proc_qtu_data_tree, current->tgid);
2389                 /*
2390                  * TODO: remove if, and start failing.
2391                  * At first, we want to catch user-space code that is not
2392                  * opening the /dev/xt_qtaguid.
2393                  */
2394                 if (IS_ERR_OR_NULL(pqd_entry))
2395                         pr_warn_once(
2396                                 "qtaguid: %s(): "
2397                                 "User space forgot to open /dev/xt_qtaguid? "
2398                                 "pid=%u tgid=%u uid=%u\n", __func__,
2399                                 current->pid, current->tgid,
2400                                 current_fsuid());
2401                 else
2402                         list_add(&sock_tag_entry->list,
2403                                  &pqd_entry->sock_tag_list);
2404                 spin_unlock_bh(&uid_tag_data_tree_lock);
2405
2406                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2407                 atomic64_inc(&qtu_events.sockets_tagged);
2408         }
2409         spin_unlock_bh(&sock_tag_list_lock);
2410         /* We keep the ref to the socket (file) until it is untagged */
2411         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2412                  input, sock_tag_entry,
2413                  atomic_long_read(&el_socket->file->f_count));
2414         return 0;
2415
2416 err_tag_unref_put:
2417         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2418         tag_ref_entry->num_sock_tags--;
2419         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2420 err_put:
2421         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2422                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2423         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2424         sockfd_put(el_socket);
2425         return res;
2426
2427 err:
2428         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2429         return res;
2430 }
2431
2432 static int ctrl_cmd_untag(const char *input)
2433 {
2434         char cmd;
2435         int sock_fd = 0;
2436         struct socket *el_socket;
2437         int res, argc;
2438         struct sock_tag *sock_tag_entry;
2439         struct tag_ref *tag_ref_entry;
2440         struct uid_tag_data *utd_entry;
2441         struct proc_qtu_data *pqd_entry;
2442
2443         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2444         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2445                  input, argc, cmd, sock_fd);
2446         if (argc < 2) {
2447                 res = -EINVAL;
2448                 goto err;
2449         }
2450         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2451         if (!el_socket) {
2452                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2453                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2454                         input, sock_fd, res, current->pid, current->tgid,
2455                         current_fsuid());
2456                 goto err;
2457         }
2458         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2459                  input, atomic_long_read(&el_socket->file->f_count),
2460                  el_socket->sk);
2461         spin_lock_bh(&sock_tag_list_lock);
2462         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2463         if (!sock_tag_entry) {
2464                 spin_unlock_bh(&sock_tag_list_lock);
2465                 res = -EINVAL;
2466                 goto err_put;
2467         }
2468         /*
2469          * The socket already belongs to the current process
2470          * so it can do whatever it wants to it.
2471          */
2472         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2473
2474         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2475         BUG_ON(!tag_ref_entry);
2476         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2477         spin_lock_bh(&uid_tag_data_tree_lock);
2478         pqd_entry = proc_qtu_data_tree_search(
2479                 &proc_qtu_data_tree, current->tgid);
2480         /*
2481          * TODO: remove if, and start failing.
2482          * At first, we want to catch user-space code that is not
2483          * opening the /dev/xt_qtaguid.
2484          */
2485         if (IS_ERR_OR_NULL(pqd_entry))
2486                 pr_warn_once("qtaguid: %s(): "
2487                              "User space forgot to open /dev/xt_qtaguid? "
2488                              "pid=%u tgid=%u uid=%u\n", __func__,
2489                              current->pid, current->tgid, current_fsuid());
2490         else
2491                 list_del(&sock_tag_entry->list);
2492         spin_unlock_bh(&uid_tag_data_tree_lock);
2493         /*
2494          * We don't free tag_ref from the utd_entry here,
2495          * only during a cmd_delete().
2496          */
2497         tag_ref_entry->num_sock_tags--;
2498         spin_unlock_bh(&sock_tag_list_lock);
2499         /*
2500          * Release the sock_fd that was grabbed at tag time,
2501          * and once more for the sockfd_lookup() here.
2502          */
2503         sockfd_put(sock_tag_entry->socket);
2504         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2505                  input, sock_tag_entry,
2506                  atomic_long_read(&el_socket->file->f_count) - 1);
2507         sockfd_put(el_socket);
2508
2509         kfree(sock_tag_entry);
2510         atomic64_inc(&qtu_events.sockets_untagged);
2511
2512         return 0;
2513
2514 err_put:
2515         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2516                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2517         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2518         sockfd_put(el_socket);
2519         return res;
2520
2521 err:
2522         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2523         return res;
2524 }
2525
2526 static int qtaguid_ctrl_parse(const char *input, int count)
2527 {
2528         char cmd;
2529         int res;
2530
2531         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2532                  input, current->pid, current->tgid, current_fsuid());
2533
2534         cmd = input[0];
2535         /* Collect params for commands */
2536         switch (cmd) {
2537         case 'd':
2538                 res = ctrl_cmd_delete(input);
2539                 break;
2540
2541         case 's':
2542                 res = ctrl_cmd_counter_set(input);
2543                 break;
2544
2545         case 't':
2546                 res = ctrl_cmd_tag(input);
2547                 break;
2548
2549         case 'u':
2550                 res = ctrl_cmd_untag(input);
2551                 break;
2552
2553         default:
2554                 res = -EINVAL;
2555                 goto err;
2556         }
2557         if (!res)
2558                 res = count;
2559 err:
2560         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2561         return res;
2562 }
2563
2564 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2565 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2566                         unsigned long count, void *data)
2567 {
2568         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2569
2570         if (unlikely(module_passive))
2571                 return count;
2572
2573         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2574                 return -EINVAL;
2575
2576         if (copy_from_user(input_buf, buffer, count))
2577                 return -EFAULT;
2578
2579         input_buf[count] = '\0';
2580         return qtaguid_ctrl_parse(input_buf, count);
2581 }
2582
2583 struct proc_print_info {
2584         char *outp;
2585         char **num_items_returned;
2586         struct iface_stat *iface_entry;
2587         struct tag_stat *ts_entry;
2588         int item_index;
2589         int items_to_skip;
2590         int char_count;
2591 };
2592
2593 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2594 {
2595         int len;
2596         struct data_counters *cnts;
2597
2598         if (!ppi->item_index) {
2599                 if (ppi->item_index++ < ppi->items_to_skip)
2600                         return 0;
2601                 len = snprintf(ppi->outp, ppi->char_count,
2602                                "idx iface acct_tag_hex uid_tag_int cnt_set "
2603                                "rx_bytes rx_packets "
2604                                "tx_bytes tx_packets "
2605                                "rx_tcp_bytes rx_tcp_packets "
2606                                "rx_udp_bytes rx_udp_packets "
2607                                "rx_other_bytes rx_other_packets "
2608                                "tx_tcp_bytes tx_tcp_packets "
2609                                "tx_udp_bytes tx_udp_packets "
2610                                "tx_other_bytes tx_other_packets\n");
2611         } else {
2612                 tag_t tag = ppi->ts_entry->tn.tag;
2613                 uid_t stat_uid = get_uid_from_tag(tag);
2614                 /* Detailed tags are not available to everybody */
2615                 if (get_atag_from_tag(tag)
2616                     && !can_read_other_uid_stats(stat_uid)) {
2617                         CT_DEBUG("qtaguid: stats line: "
2618                                  "%s 0x%llx %u: insufficient priv "
2619                                  "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2620                                  ppi->iface_entry->ifname,
2621                                  get_atag_from_tag(tag), stat_uid,
2622                                  current->pid, current->tgid, current_fsuid(),
2623                                  xt_qtaguid_stats_file->gid);
2624                         return 0;
2625                 }
2626                 if (ppi->item_index++ < ppi->items_to_skip)
2627                         return 0;
2628                 cnts = &ppi->ts_entry->counters;
2629                 len = snprintf(
2630                         ppi->outp, ppi->char_count,
2631                         "%d %s 0x%llx %u %u "
2632                         "%llu %llu "
2633                         "%llu %llu "
2634                         "%llu %llu "
2635                         "%llu %llu "
2636                         "%llu %llu "
2637                         "%llu %llu "
2638                         "%llu %llu "
2639                         "%llu %llu\n",
2640                         ppi->item_index,
2641                         ppi->iface_entry->ifname,
2642                         get_atag_from_tag(tag),
2643                         stat_uid,
2644                         cnt_set,
2645                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
2646                         dc_sum_packets(cnts, cnt_set, IFS_RX),
2647                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
2648                         dc_sum_packets(cnts, cnt_set, IFS_TX),
2649                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2650                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2651                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2652                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2653                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2654                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2655                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2656                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2657                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2658                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2659                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2660                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2661         }
2662         return len;
2663 }
2664
2665 static bool pp_sets(struct proc_print_info *ppi)
2666 {
2667         int len;
2668         int counter_set;
2669         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2670              counter_set++) {
2671                 len = pp_stats_line(ppi, counter_set);
2672                 if (len >= ppi->char_count) {
2673                         *ppi->outp = '\0';
2674                         return false;
2675                 }
2676                 if (len) {
2677                         ppi->outp += len;
2678                         ppi->char_count -= len;
2679                         (*ppi->num_items_returned)++;
2680                 }
2681         }
2682         return true;
2683 }
2684
2685 /*
2686  * Procfs reader to get all tag stats using style "1)" as described in
2687  * fs/proc/generic.c
2688  * Groups all protocols tx/rx bytes.
2689  */
2690 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2691                                 off_t items_to_skip, int char_count, int *eof,
2692                                 void *data)
2693 {
2694         struct proc_print_info ppi;
2695         int len;
2696
2697         ppi.outp = page;
2698         ppi.item_index = 0;
2699         ppi.char_count = char_count;
2700         ppi.num_items_returned = num_items_returned;
2701         ppi.items_to_skip = items_to_skip;
2702
2703         if (unlikely(module_passive)) {
2704                 len = pp_stats_line(&ppi, 0);
2705                 /* The header should always be shorter than the buffer. */
2706                 BUG_ON(len >= ppi.char_count);
2707                 (*num_items_returned)++;
2708                 *eof = 1;
2709                 return len;
2710         }
2711
2712         CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u "
2713                  "page=%p *num_items_returned=%p off=%ld "
2714                  "char_count=%d *eof=%d\n",
2715                  current->pid, current->tgid, current_fsuid(),
2716                  page, *num_items_returned,
2717                  items_to_skip, char_count, *eof);
2718
2719         if (*eof)
2720                 return 0;
2721
2722         /* The idx is there to help debug when things go belly up. */
2723         len = pp_stats_line(&ppi, 0);
2724         /* Don't advance the outp unless the whole line was printed */
2725         if (len >= ppi.char_count) {
2726                 *ppi.outp = '\0';
2727                 return ppi.outp - page;
2728         }
2729         if (len) {
2730                 ppi.outp += len;
2731                 ppi.char_count -= len;
2732                 (*num_items_returned)++;
2733         }
2734
2735         spin_lock_bh(&iface_stat_list_lock);
2736         list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2737                 struct rb_node *node;
2738                 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2739                 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2740                      node;
2741                      node = rb_next(node)) {
2742                         ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2743                         if (!pp_sets(&ppi)) {
2744                                 spin_unlock_bh(
2745                                         &ppi.iface_entry->tag_stat_list_lock);
2746                                 spin_unlock_bh(&iface_stat_list_lock);
2747                                 return ppi.outp - page;
2748                         }
2749                 }
2750                 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2751         }
2752         spin_unlock_bh(&iface_stat_list_lock);
2753
2754         *eof = 1;
2755         return ppi.outp - page;
2756 }
2757
2758 /*------------------------------------------*/
2759 static int qtudev_open(struct inode *inode, struct file *file)
2760 {
2761         struct uid_tag_data *utd_entry;
2762         struct proc_qtu_data  *pqd_entry;
2763         struct proc_qtu_data  *new_pqd_entry;
2764         int res;
2765         bool utd_entry_found;
2766
2767         if (unlikely(qtu_proc_handling_passive))
2768                 return 0;
2769
2770         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2771                  current->pid, current->tgid, current_fsuid());
2772
2773         spin_lock_bh(&uid_tag_data_tree_lock);
2774
2775         /* Look for existing uid data, or alloc one. */
2776         utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2777         if (IS_ERR_OR_NULL(utd_entry)) {
2778                 res = PTR_ERR(utd_entry);
2779                 goto err_unlock;
2780         }
2781
2782         /* Look for existing PID based proc_data */
2783         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2784                                               current->tgid);
2785         if (pqd_entry) {
2786                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2787                        "%s already opened\n",
2788                        current->pid, current->tgid, current_fsuid(),
2789                        QTU_DEV_NAME);
2790                 res = -EBUSY;
2791                 goto err_unlock_free_utd;
2792         }
2793
2794         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2795         if (!new_pqd_entry) {
2796                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2797                        "proc data alloc failed\n",
2798                        current->pid, current->tgid, current_fsuid());
2799                 res = -ENOMEM;
2800                 goto err_unlock_free_utd;
2801         }
2802         new_pqd_entry->pid = current->tgid;
2803         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2804         new_pqd_entry->parent_tag_data = utd_entry;
2805         utd_entry->num_pqd++;
2806
2807         proc_qtu_data_tree_insert(new_pqd_entry,
2808                                   &proc_qtu_data_tree);
2809
2810         spin_unlock_bh(&uid_tag_data_tree_lock);
2811         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2812                  current_fsuid(), new_pqd_entry);
2813         file->private_data = new_pqd_entry;
2814         return 0;
2815
2816 err_unlock_free_utd:
2817         if (!utd_entry_found) {
2818                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2819                 kfree(utd_entry);
2820         }
2821 err_unlock:
2822         spin_unlock_bh(&uid_tag_data_tree_lock);
2823         return res;
2824 }
2825
2826 static int qtudev_release(struct inode *inode, struct file *file)
2827 {
2828         struct proc_qtu_data  *pqd_entry = file->private_data;
2829         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2830         struct sock_tag *st_entry;
2831         struct rb_root st_to_free_tree = RB_ROOT;
2832         struct list_head *entry, *next;
2833         struct tag_ref *tr;
2834
2835         if (unlikely(qtu_proc_handling_passive))
2836                 return 0;
2837
2838         /*
2839          * Do not trust the current->pid, it might just be a kworker cleaning
2840          * up after a dead proc.
2841          */
2842         DR_DEBUG("qtaguid: qtudev_release(): "
2843                  "pid=%u tgid=%u uid=%u "
2844                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2845                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2846                  pqd_entry, pqd_entry->pid, utd_entry,
2847                  utd_entry->num_active_tags);
2848
2849         spin_lock_bh(&sock_tag_list_lock);
2850         spin_lock_bh(&uid_tag_data_tree_lock);
2851
2852         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2853                 st_entry = list_entry(entry, struct sock_tag, list);
2854                 DR_DEBUG("qtaguid: %s(): "
2855                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2856                          __func__,
2857                          st_entry, st_entry->sk,
2858                          current->pid, current->tgid,
2859                          pqd_entry->parent_tag_data->uid);
2860
2861                 utd_entry = uid_tag_data_tree_search(
2862                         &uid_tag_data_tree,
2863                         get_uid_from_tag(st_entry->tag));
2864                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2865                 DR_DEBUG("qtaguid: %s(): "
2866                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2867                          st_entry->tag, utd_entry);
2868                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2869                                          st_entry->tag);
2870                 BUG_ON(!tr);
2871                 BUG_ON(tr->num_sock_tags <= 0);
2872                 tr->num_sock_tags--;
2873                 free_tag_ref_from_utd_entry(tr, utd_entry);
2874
2875                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2876                 list_del(&st_entry->list);
2877                 /* Can't sockfd_put() within spinlock, do it later. */
2878                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2879
2880                 /*
2881                  * Try to free the utd_entry if no other proc_qtu_data is
2882                  * using it (num_pqd is 0) and it doesn't have active tags
2883                  * (num_active_tags is 0).
2884                  */
2885                 put_utd_entry(utd_entry);
2886         }
2887
2888         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2889         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2890         pqd_entry->parent_tag_data->num_pqd--;
2891         put_utd_entry(pqd_entry->parent_tag_data);
2892         kfree(pqd_entry);
2893         file->private_data = NULL;
2894
2895         spin_unlock_bh(&uid_tag_data_tree_lock);
2896         spin_unlock_bh(&sock_tag_list_lock);
2897
2898
2899         sock_tag_tree_erase(&st_to_free_tree);
2900
2901         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2902                            current->pid, current->tgid);
2903         return 0;
2904 }
2905
2906 /*------------------------------------------*/
2907 static const struct file_operations qtudev_fops = {
2908         .owner = THIS_MODULE,
2909         .open = qtudev_open,
2910         .release = qtudev_release,
2911 };
2912
2913 static struct miscdevice qtu_device = {
2914         .minor = MISC_DYNAMIC_MINOR,
2915         .name = QTU_DEV_NAME,
2916         .fops = &qtudev_fops,
2917         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2918 };
2919
2920 /*------------------------------------------*/
2921 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2922 {
2923         int ret;
2924         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2925         if (!*res_procdir) {
2926                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2927                 ret = -ENOMEM;
2928                 goto no_dir;
2929         }
2930
2931         xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2932                                                 *res_procdir);
2933         if (!xt_qtaguid_ctrl_file) {
2934                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2935                         " file\n");
2936                 ret = -ENOMEM;
2937                 goto no_ctrl_entry;
2938         }
2939         xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2940         xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2941
2942         xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2943                                                 *res_procdir);
2944         if (!xt_qtaguid_stats_file) {
2945                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2946                         "file\n");
2947                 ret = -ENOMEM;
2948                 goto no_stats_entry;
2949         }
2950         xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2951         /*
2952          * TODO: add support counter hacking
2953          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2954          */
2955         return 0;
2956
2957 no_stats_entry:
2958         remove_proc_entry("ctrl", *res_procdir);
2959 no_ctrl_entry:
2960         remove_proc_entry("xt_qtaguid", NULL);
2961 no_dir:
2962         return ret;
2963 }
2964
2965 static struct xt_match qtaguid_mt_reg __read_mostly = {
2966         /*
2967          * This module masquerades as the "owner" module so that iptables
2968          * tools can deal with it.
2969          */
2970         .name       = "owner",
2971         .revision   = 1,
2972         .family     = NFPROTO_UNSPEC,
2973         .match      = qtaguid_mt,
2974         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2975         .me         = THIS_MODULE,
2976 };
2977
2978 static int __init qtaguid_mt_init(void)
2979 {
2980         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2981             || iface_stat_init(xt_qtaguid_procdir)
2982             || xt_register_match(&qtaguid_mt_reg)
2983             || misc_register(&qtu_device))
2984                 return -1;
2985         return 0;
2986 }
2987
2988 /*
2989  * TODO: allow unloading of the module.
2990  * For now stats are permanent.
2991  * Kconfig forces'y/n' and never an 'm'.
2992  */
2993
2994 module_init(qtaguid_mt_init);
2995 MODULE_AUTHOR("jpa <jpa@google.com>");
2996 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2997 MODULE_LICENSE("GPL");
2998 MODULE_ALIAS("ipt_owner");
2999 MODULE_ALIAS("ip6t_owner");
3000 MODULE_ALIAS("ipt_qtaguid");
3001 MODULE_ALIAS("ip6t_qtaguid");