netfilter: xt_qtaguid: Allow tracking loopback
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/skbuff.h>
23 #include <linux/workqueue.h>
24 #include <net/addrconf.h>
25 #include <net/sock.h>
26 #include <net/tcp.h>
27 #include <net/udp.h>
28
29 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
30 #include <linux/netfilter_ipv6/ip6_tables.h>
31 #endif
32
33 #include <linux/netfilter/xt_socket.h>
34 #include "xt_qtaguid_internal.h"
35 #include "xt_qtaguid_print.h"
36
37 /*
38  * We only use the xt_socket funcs within a similar context to avoid unexpected
39  * return values.
40  */
41 #define XT_SOCKET_SUPPORTED_HOOKS \
42         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
43
44
45 static const char *module_procdirname = "xt_qtaguid";
46 static struct proc_dir_entry *xt_qtaguid_procdir;
47
48 static unsigned int proc_iface_perms = S_IRUGO;
49 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
50
51 static struct proc_dir_entry *xt_qtaguid_stats_file;
52 static unsigned int proc_stats_perms = S_IRUGO;
53 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
54
55 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
56
57 /* Everybody can write. But proc_ctrl_write_limited is true by default which
58  * limits what can be controlled. See the can_*() functions.
59  */
60 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
61 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
62
63 /* Limited by default, so the gid of the ctrl and stats proc entries
64  * will limit what can be done. See the can_*() functions.
65  */
66 static bool proc_stats_readall_limited = true;
67 static bool proc_ctrl_write_limited = true;
68
69 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
70                    S_IRUGO | S_IWUSR);
71 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
72                    S_IRUGO | S_IWUSR);
73
74 /*
75  * Limit the number of active tags (via socket tags) for a given UID.
76  * Multiple processes could share the UID.
77  */
78 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
79 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
80
81 /*
82  * After the kernel has initiallized this module, it is still possible
83  * to make it passive.
84  * Setting passive to Y:
85  *  - the iface stats handling will not act on notifications.
86  *  - iptables matches will never match.
87  *  - ctrl commands silently succeed.
88  *  - stats are always empty.
89  * This is mostly usefull when a bug is suspected.
90  */
91 static bool module_passive;
92 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
93
94 /*
95  * Control how qtaguid data is tracked per proc/uid.
96  * Setting tag_tracking_passive to Y:
97  *  - don't create proc specific structs to track tags
98  *  - don't check that active tag stats exceed some limits.
99  *  - don't clean up socket tags on process exits.
100  * This is mostly usefull when a bug is suspected.
101  */
102 static bool qtu_proc_handling_passive;
103 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
104                    S_IRUGO | S_IWUSR);
105
106 #define QTU_DEV_NAME "xt_qtaguid"
107
108 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
109 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
110
111 /*---------------------------------------------------------------------------*/
112 static const char *iface_stat_procdirname = "iface_stat";
113 static struct proc_dir_entry *iface_stat_procdir;
114 /*
115  * The iface_stat_all* will go away once userspace gets use to the new fields
116  * that have a format line.
117  */
118 static const char *iface_stat_all_procfilename = "iface_stat_all";
119 static struct proc_dir_entry *iface_stat_all_procfile;
120 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
121 static struct proc_dir_entry *iface_stat_fmt_procfile;
122
123
124 /*
125  * Ordering of locks:
126  *  outer locks:
127  *    iface_stat_list_lock
128  *    sock_tag_list_lock
129  *  inner locks:
130  *    uid_tag_data_tree_lock
131  *    tag_counter_set_list_lock
132  * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
133  * is acquired.
134  *
135  * Call tree with all lock holders as of 2012-04-27:
136  *
137  * iface_stat_fmt_proc_read()
138  *   iface_stat_list_lock
139  *     (struct iface_stat)
140  *
141  * qtaguid_ctrl_proc_read()
142  *   sock_tag_list_lock
143  *     (sock_tag_tree)
144  *     (struct proc_qtu_data->sock_tag_list)
145  *   prdebug_full_state()
146  *     sock_tag_list_lock
147  *       (sock_tag_tree)
148  *     uid_tag_data_tree_lock
149  *       (uid_tag_data_tree)
150  *       (proc_qtu_data_tree)
151  *     iface_stat_list_lock
152  *
153  * qtaguid_stats_proc_read()
154  *   iface_stat_list_lock
155  *     struct iface_stat->tag_stat_list_lock
156  *
157  * qtudev_open()
158  *   uid_tag_data_tree_lock
159  *
160  * qtudev_release()
161  *   sock_tag_data_list_lock
162  *     uid_tag_data_tree_lock
163  *   prdebug_full_state()
164  *     sock_tag_list_lock
165  *     uid_tag_data_tree_lock
166  *     iface_stat_list_lock
167  *
168  * iface_netdev_event_handler()
169  *   iface_stat_create()
170  *     iface_stat_list_lock
171  *   iface_stat_update()
172  *     iface_stat_list_lock
173  *
174  * iface_inetaddr_event_handler()
175  *   iface_stat_create()
176  *     iface_stat_list_lock
177  *   iface_stat_update()
178  *     iface_stat_list_lock
179  *
180  * iface_inet6addr_event_handler()
181  *   iface_stat_create_ipv6()
182  *     iface_stat_list_lock
183  *   iface_stat_update()
184  *     iface_stat_list_lock
185  *
186  * qtaguid_mt()
187  *   account_for_uid()
188  *     if_tag_stat_update()
189  *       get_sock_stat()
190  *         sock_tag_list_lock
191  *       struct iface_stat->tag_stat_list_lock
192  *         tag_stat_update()
193  *           get_active_counter_set()
194  *             tag_counter_set_list_lock
195  *         tag_stat_update()
196  *           get_active_counter_set()
197  *             tag_counter_set_list_lock
198  *
199  *
200  * qtaguid_ctrl_parse()
201  *   ctrl_cmd_delete()
202  *     sock_tag_list_lock
203  *     tag_counter_set_list_lock
204  *     iface_stat_list_lock
205  *       struct iface_stat->tag_stat_list_lock
206  *     uid_tag_data_tree_lock
207  *   ctrl_cmd_counter_set()
208  *     tag_counter_set_list_lock
209  *   ctrl_cmd_tag()
210  *     sock_tag_list_lock
211  *       (sock_tag_tree)
212  *       get_tag_ref()
213  *         uid_tag_data_tree_lock
214  *           (uid_tag_data_tree)
215  *       uid_tag_data_tree_lock
216  *         (proc_qtu_data_tree)
217  *   ctrl_cmd_untag()
218  *     sock_tag_list_lock
219  *     uid_tag_data_tree_lock
220  *
221  */
222 static LIST_HEAD(iface_stat_list);
223 static DEFINE_SPINLOCK(iface_stat_list_lock);
224
225 static struct rb_root sock_tag_tree = RB_ROOT;
226 static DEFINE_SPINLOCK(sock_tag_list_lock);
227
228 static struct rb_root tag_counter_set_tree = RB_ROOT;
229 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
230
231 static struct rb_root uid_tag_data_tree = RB_ROOT;
232 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
233
234 static struct rb_root proc_qtu_data_tree = RB_ROOT;
235 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
236
237 static struct qtaguid_event_counts qtu_events;
238 /*----------------------------------------------*/
239 static bool can_manipulate_uids(void)
240 {
241         /* root pwnd */
242         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
243                 || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited)
244                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
245 }
246
247 static bool can_impersonate_uid(uid_t uid)
248 {
249         return uid == current_fsuid() || can_manipulate_uids();
250 }
251
252 static bool can_read_other_uid_stats(uid_t uid)
253 {
254         /* root pwnd */
255         return in_egroup_p(xt_qtaguid_stats_file->gid)
256                 || unlikely(!current_fsuid()) || uid == current_fsuid()
257                 || unlikely(!proc_stats_readall_limited)
258                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
259 }
260
261 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
262                                   enum ifs_tx_rx direction,
263                                   enum ifs_proto ifs_proto,
264                                   int bytes,
265                                   int packets)
266 {
267         counters->bpc[set][direction][ifs_proto].bytes += bytes;
268         counters->bpc[set][direction][ifs_proto].packets += packets;
269 }
270
271 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
272 {
273         struct rb_node *node = root->rb_node;
274
275         while (node) {
276                 struct tag_node *data = rb_entry(node, struct tag_node, node);
277                 int result;
278                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
279                          " node=%p data=%p\n", tag, node, data);
280                 result = tag_compare(tag, data->tag);
281                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
282                          " data.tag=0x%llx (uid=%u) res=%d\n",
283                          tag, data->tag, get_uid_from_tag(data->tag), result);
284                 if (result < 0)
285                         node = node->rb_left;
286                 else if (result > 0)
287                         node = node->rb_right;
288                 else
289                         return data;
290         }
291         return NULL;
292 }
293
294 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
295 {
296         struct rb_node **new = &(root->rb_node), *parent = NULL;
297
298         /* Figure out where to put new node */
299         while (*new) {
300                 struct tag_node *this = rb_entry(*new, struct tag_node,
301                                                  node);
302                 int result = tag_compare(data->tag, this->tag);
303                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
304                          " (uid=%u)\n", __func__,
305                          this->tag,
306                          get_uid_from_tag(this->tag));
307                 parent = *new;
308                 if (result < 0)
309                         new = &((*new)->rb_left);
310                 else if (result > 0)
311                         new = &((*new)->rb_right);
312                 else
313                         BUG();
314         }
315
316         /* Add new node and rebalance tree. */
317         rb_link_node(&data->node, parent, new);
318         rb_insert_color(&data->node, root);
319 }
320
321 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
322 {
323         tag_node_tree_insert(&data->tn, root);
324 }
325
326 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
327 {
328         struct tag_node *node = tag_node_tree_search(root, tag);
329         if (!node)
330                 return NULL;
331         return rb_entry(&node->node, struct tag_stat, tn.node);
332 }
333
334 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
335                                         struct rb_root *root)
336 {
337         tag_node_tree_insert(&data->tn, root);
338 }
339
340 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
341                                                            tag_t tag)
342 {
343         struct tag_node *node = tag_node_tree_search(root, tag);
344         if (!node)
345                 return NULL;
346         return rb_entry(&node->node, struct tag_counter_set, tn.node);
347
348 }
349
350 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
351 {
352         tag_node_tree_insert(&data->tn, root);
353 }
354
355 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
356 {
357         struct tag_node *node = tag_node_tree_search(root, tag);
358         if (!node)
359                 return NULL;
360         return rb_entry(&node->node, struct tag_ref, tn.node);
361 }
362
363 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
364                                              const struct sock *sk)
365 {
366         struct rb_node *node = root->rb_node;
367
368         while (node) {
369                 struct sock_tag *data = rb_entry(node, struct sock_tag,
370                                                  sock_node);
371                 if (sk < data->sk)
372                         node = node->rb_left;
373                 else if (sk > data->sk)
374                         node = node->rb_right;
375                 else
376                         return data;
377         }
378         return NULL;
379 }
380
381 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
382 {
383         struct rb_node **new = &(root->rb_node), *parent = NULL;
384
385         /* Figure out where to put new node */
386         while (*new) {
387                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
388                                                  sock_node);
389                 parent = *new;
390                 if (data->sk < this->sk)
391                         new = &((*new)->rb_left);
392                 else if (data->sk > this->sk)
393                         new = &((*new)->rb_right);
394                 else
395                         BUG();
396         }
397
398         /* Add new node and rebalance tree. */
399         rb_link_node(&data->sock_node, parent, new);
400         rb_insert_color(&data->sock_node, root);
401 }
402
403 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
404 {
405         struct rb_node *node;
406         struct sock_tag *st_entry;
407
408         node = rb_first(st_to_free_tree);
409         while (node) {
410                 st_entry = rb_entry(node, struct sock_tag, sock_node);
411                 node = rb_next(node);
412                 CT_DEBUG("qtaguid: %s(): "
413                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
414                          st_entry->sk,
415                          st_entry->tag,
416                          get_uid_from_tag(st_entry->tag));
417                 rb_erase(&st_entry->sock_node, st_to_free_tree);
418                 sockfd_put(st_entry->socket);
419                 kfree(st_entry);
420         }
421 }
422
423 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
424                                                        const pid_t pid)
425 {
426         struct rb_node *node = root->rb_node;
427
428         while (node) {
429                 struct proc_qtu_data *data = rb_entry(node,
430                                                       struct proc_qtu_data,
431                                                       node);
432                 if (pid < data->pid)
433                         node = node->rb_left;
434                 else if (pid > data->pid)
435                         node = node->rb_right;
436                 else
437                         return data;
438         }
439         return NULL;
440 }
441
442 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
443                                       struct rb_root *root)
444 {
445         struct rb_node **new = &(root->rb_node), *parent = NULL;
446
447         /* Figure out where to put new node */
448         while (*new) {
449                 struct proc_qtu_data *this = rb_entry(*new,
450                                                       struct proc_qtu_data,
451                                                       node);
452                 parent = *new;
453                 if (data->pid < this->pid)
454                         new = &((*new)->rb_left);
455                 else if (data->pid > this->pid)
456                         new = &((*new)->rb_right);
457                 else
458                         BUG();
459         }
460
461         /* Add new node and rebalance tree. */
462         rb_link_node(&data->node, parent, new);
463         rb_insert_color(&data->node, root);
464 }
465
466 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
467                                      struct rb_root *root)
468 {
469         struct rb_node **new = &(root->rb_node), *parent = NULL;
470
471         /* Figure out where to put new node */
472         while (*new) {
473                 struct uid_tag_data *this = rb_entry(*new,
474                                                      struct uid_tag_data,
475                                                      node);
476                 parent = *new;
477                 if (data->uid < this->uid)
478                         new = &((*new)->rb_left);
479                 else if (data->uid > this->uid)
480                         new = &((*new)->rb_right);
481                 else
482                         BUG();
483         }
484
485         /* Add new node and rebalance tree. */
486         rb_link_node(&data->node, parent, new);
487         rb_insert_color(&data->node, root);
488 }
489
490 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
491                                                      uid_t uid)
492 {
493         struct rb_node *node = root->rb_node;
494
495         while (node) {
496                 struct uid_tag_data *data = rb_entry(node,
497                                                      struct uid_tag_data,
498                                                      node);
499                 if (uid < data->uid)
500                         node = node->rb_left;
501                 else if (uid > data->uid)
502                         node = node->rb_right;
503                 else
504                         return data;
505         }
506         return NULL;
507 }
508
509 /*
510  * Allocates a new uid_tag_data struct if needed.
511  * Returns a pointer to the found or allocated uid_tag_data.
512  * Returns a PTR_ERR on failures, and lock is not held.
513  * If found is not NULL:
514  *   sets *found to true if not allocated.
515  *   sets *found to false if allocated.
516  */
517 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
518 {
519         struct uid_tag_data *utd_entry;
520
521         /* Look for top level uid_tag_data for the UID */
522         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
523         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
524
525         if (found_res)
526                 *found_res = utd_entry;
527         if (utd_entry)
528                 return utd_entry;
529
530         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
531         if (!utd_entry) {
532                 pr_err("qtaguid: get_uid_data(%u): "
533                        "tag data alloc failed\n", uid);
534                 return ERR_PTR(-ENOMEM);
535         }
536
537         utd_entry->uid = uid;
538         utd_entry->tag_ref_tree = RB_ROOT;
539         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
540         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
541         return utd_entry;
542 }
543
544 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
545 static struct tag_ref *new_tag_ref(tag_t new_tag,
546                                    struct uid_tag_data *utd_entry)
547 {
548         struct tag_ref *tr_entry;
549         int res;
550
551         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
552                 pr_info("qtaguid: new_tag_ref(0x%llx): "
553                         "tag ref alloc quota exceeded. max=%d\n",
554                         new_tag, max_sock_tags);
555                 res = -EMFILE;
556                 goto err_res;
557
558         }
559
560         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
561         if (!tr_entry) {
562                 pr_err("qtaguid: new_tag_ref(0x%llx): "
563                        "tag ref alloc failed\n",
564                        new_tag);
565                 res = -ENOMEM;
566                 goto err_res;
567         }
568         tr_entry->tn.tag = new_tag;
569         /* tr_entry->num_sock_tags  handled by caller */
570         utd_entry->num_active_tags++;
571         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
572         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
573                  " inserted new tag ref %p\n",
574                  new_tag, tr_entry);
575         return tr_entry;
576
577 err_res:
578         return ERR_PTR(res);
579 }
580
581 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
582                                       struct uid_tag_data **utd_res)
583 {
584         struct uid_tag_data *utd_entry;
585         struct tag_ref *tr_entry;
586         bool found_utd;
587         uid_t uid = get_uid_from_tag(full_tag);
588
589         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
590                  full_tag, uid);
591
592         utd_entry = get_uid_data(uid, &found_utd);
593         if (IS_ERR_OR_NULL(utd_entry)) {
594                 if (utd_res)
595                         *utd_res = utd_entry;
596                 return NULL;
597         }
598
599         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
600         if (utd_res)
601                 *utd_res = utd_entry;
602         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
603                  full_tag, utd_entry, tr_entry);
604         return tr_entry;
605 }
606
607 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
608 static struct tag_ref *get_tag_ref(tag_t full_tag,
609                                    struct uid_tag_data **utd_res)
610 {
611         struct uid_tag_data *utd_entry;
612         struct tag_ref *tr_entry;
613
614         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
615                  full_tag);
616         spin_lock_bh(&uid_tag_data_tree_lock);
617         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
618         BUG_ON(IS_ERR_OR_NULL(utd_entry));
619         if (!tr_entry)
620                 tr_entry = new_tag_ref(full_tag, utd_entry);
621
622         spin_unlock_bh(&uid_tag_data_tree_lock);
623         if (utd_res)
624                 *utd_res = utd_entry;
625         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
626                  full_tag, utd_entry, tr_entry);
627         return tr_entry;
628 }
629
630 /* Checks and maybe frees the UID Tag Data entry */
631 static void put_utd_entry(struct uid_tag_data *utd_entry)
632 {
633         /* Are we done with the UID tag data entry? */
634         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
635                 !utd_entry->num_pqd) {
636                 DR_DEBUG("qtaguid: %s(): "
637                          "erase utd_entry=%p uid=%u "
638                          "by pid=%u tgid=%u uid=%u\n", __func__,
639                          utd_entry, utd_entry->uid,
640                          current->pid, current->tgid, current_fsuid());
641                 BUG_ON(utd_entry->num_active_tags);
642                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
643                 kfree(utd_entry);
644         } else {
645                 DR_DEBUG("qtaguid: %s(): "
646                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
647                          __func__, utd_entry, utd_entry->num_active_tags,
648                          utd_entry->num_pqd);
649                 BUG_ON(!(utd_entry->num_active_tags ||
650                          utd_entry->num_pqd));
651         }
652 }
653
654 /*
655  * If no sock_tags are using this tag_ref,
656  * decrements refcount of utd_entry, removes tr_entry
657  * from utd_entry->tag_ref_tree and frees.
658  */
659 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
660                                         struct uid_tag_data *utd_entry)
661 {
662         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
663                  tr_entry, tr_entry->tn.tag,
664                  get_uid_from_tag(tr_entry->tn.tag));
665         if (!tr_entry->num_sock_tags) {
666                 BUG_ON(!utd_entry->num_active_tags);
667                 utd_entry->num_active_tags--;
668                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
669                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
670                 kfree(tr_entry);
671         }
672 }
673
674 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
675 {
676         struct rb_node *node;
677         struct tag_ref *tr_entry;
678         tag_t acct_tag;
679
680         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
681                  full_tag, get_uid_from_tag(full_tag));
682         acct_tag = get_atag_from_tag(full_tag);
683         node = rb_first(&utd_entry->tag_ref_tree);
684         while (node) {
685                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
686                 node = rb_next(node);
687                 if (!acct_tag || tr_entry->tn.tag == full_tag)
688                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
689         }
690 }
691
692 static int read_proc_u64(char *page, char **start, off_t off,
693                         int count, int *eof, void *data)
694 {
695         int len;
696         uint64_t value;
697         char *p = page;
698         uint64_t *iface_entry = data;
699
700         if (!data)
701                 return 0;
702
703         value = *iface_entry;
704         p += sprintf(p, "%llu\n", value);
705         len = (p - page) - off;
706         *eof = (len <= count) ? 1 : 0;
707         *start = page + off;
708         return len;
709 }
710
711 static int read_proc_bool(char *page, char **start, off_t off,
712                         int count, int *eof, void *data)
713 {
714         int len;
715         bool value;
716         char *p = page;
717         bool *bool_entry = data;
718
719         if (!data)
720                 return 0;
721
722         value = *bool_entry;
723         p += sprintf(p, "%u\n", value);
724         len = (p - page) - off;
725         *eof = (len <= count) ? 1 : 0;
726         *start = page + off;
727         return len;
728 }
729
730 static int get_active_counter_set(tag_t tag)
731 {
732         int active_set = 0;
733         struct tag_counter_set *tcs;
734
735         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
736                  " (uid=%u)\n",
737                  tag, get_uid_from_tag(tag));
738         /* For now we only handle UID tags for active sets */
739         tag = get_utag_from_tag(tag);
740         spin_lock_bh(&tag_counter_set_list_lock);
741         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
742         if (tcs)
743                 active_set = tcs->active_set;
744         spin_unlock_bh(&tag_counter_set_list_lock);
745         return active_set;
746 }
747
748 /*
749  * Find the entry for tracking the specified interface.
750  * Caller must hold iface_stat_list_lock
751  */
752 static struct iface_stat *get_iface_entry(const char *ifname)
753 {
754         struct iface_stat *iface_entry;
755
756         /* Find the entry for tracking the specified tag within the interface */
757         if (ifname == NULL) {
758                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
759                 return NULL;
760         }
761
762         /* Iterate over interfaces */
763         list_for_each_entry(iface_entry, &iface_stat_list, list) {
764                 if (!strcmp(ifname, iface_entry->ifname))
765                         goto done;
766         }
767         iface_entry = NULL;
768 done:
769         return iface_entry;
770 }
771
772 /* This is for fmt2 only */
773 static int pp_iface_stat_line(bool header, char *outp,
774                               int char_count, struct iface_stat *iface_entry)
775 {
776         int len;
777         if (header) {
778                 len = snprintf(outp, char_count,
779                                "ifname "
780                                "total_skb_rx_bytes total_skb_rx_packets "
781                                "total_skb_tx_bytes total_skb_tx_packets "
782                                "rx_tcp_bytes rx_tcp_packets "
783                                "rx_udp_bytes rx_udp_packets "
784                                "rx_other_bytes rx_other_packets "
785                                "tx_tcp_bytes tx_tcp_packets "
786                                "tx_udp_bytes tx_udp_packets "
787                                "tx_other_bytes tx_other_packets\n"
788                         );
789         } else {
790                 struct data_counters *cnts;
791                 int cnt_set = 0;   /* We only use one set for the device */
792                 cnts = &iface_entry->totals_via_skb;
793                 len = snprintf(
794                         outp, char_count,
795                         "%s "
796                         "%llu %llu %llu %llu %llu %llu %llu %llu "
797                         "%llu %llu %llu %llu %llu %llu %llu %llu\n",
798                         iface_entry->ifname,
799                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
800                         dc_sum_packets(cnts, cnt_set, IFS_RX),
801                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
802                         dc_sum_packets(cnts, cnt_set, IFS_TX),
803                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
804                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
805                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
806                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
807                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
808                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
809                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
810                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
811                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
812                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
813                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
814                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
815         }
816         return len;
817 }
818
819 static int iface_stat_fmt_proc_read(char *page, char **num_items_returned,
820                                     off_t items_to_skip, int char_count,
821                                     int *eof, void *data)
822 {
823         char *outp = page;
824         int item_index = 0;
825         int len;
826         int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */
827         struct iface_stat *iface_entry;
828         struct rtnl_link_stats64 dev_stats, *stats;
829         struct rtnl_link_stats64 no_dev_stats = {0};
830
831         if (unlikely(module_passive)) {
832                 *eof = 1;
833                 return 0;
834         }
835
836         CT_DEBUG("qtaguid:proc iface_stat_fmt "
837                  "pid=%u tgid=%u uid=%u "
838                  "page=%p *num_items_returned=%p off=%ld "
839                  "char_count=%d *eof=%d\n",
840                  current->pid, current->tgid, current_fsuid(),
841                  page, *num_items_returned,
842                  items_to_skip, char_count, *eof);
843
844         if (*eof)
845                 return 0;
846
847         if (fmt == 2 && item_index++ >= items_to_skip) {
848                 len = pp_iface_stat_line(true, outp, char_count, NULL);
849                 if (len >= char_count) {
850                         *outp = '\0';
851                         return outp - page;
852                 }
853                 outp += len;
854                 char_count -= len;
855                 (*num_items_returned)++;
856         }
857
858         /*
859          * This lock will prevent iface_stat_update() from changing active,
860          * and in turn prevent an interface from unregistering itself.
861          */
862         spin_lock_bh(&iface_stat_list_lock);
863         list_for_each_entry(iface_entry, &iface_stat_list, list) {
864                 if (item_index++ < items_to_skip)
865                         continue;
866
867                 if (iface_entry->active) {
868                         stats = dev_get_stats(iface_entry->net_dev,
869                                               &dev_stats);
870                 } else {
871                         stats = &no_dev_stats;
872                 }
873                 /*
874                  * If the meaning of the data changes, then update the fmtX
875                  * string.
876                  */
877                 if (fmt == 1) {
878                         len = snprintf(
879                                 outp, char_count,
880                                 "%s %d "
881                                 "%llu %llu %llu %llu "
882                                 "%llu %llu %llu %llu\n",
883                                 iface_entry->ifname,
884                                 iface_entry->active,
885                                 iface_entry->totals_via_dev[IFS_RX].bytes,
886                                 iface_entry->totals_via_dev[IFS_RX].packets,
887                                 iface_entry->totals_via_dev[IFS_TX].bytes,
888                                 iface_entry->totals_via_dev[IFS_TX].packets,
889                                 stats->rx_bytes, stats->rx_packets,
890                                 stats->tx_bytes, stats->tx_packets
891                                 );
892                 } else {
893                         len = pp_iface_stat_line(false, outp, char_count,
894                                                  iface_entry);
895                 }
896                 if (len >= char_count) {
897                         spin_unlock_bh(&iface_stat_list_lock);
898                         *outp = '\0';
899                         return outp - page;
900                 }
901                 outp += len;
902                 char_count -= len;
903                 (*num_items_returned)++;
904         }
905         spin_unlock_bh(&iface_stat_list_lock);
906
907         *eof = 1;
908         return outp - page;
909 }
910
911 static void iface_create_proc_worker(struct work_struct *work)
912 {
913         struct proc_dir_entry *proc_entry;
914         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
915                                                    iface_work);
916         struct iface_stat *new_iface  = isw->iface_entry;
917
918         /* iface_entries are not deleted, so safe to manipulate. */
919         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
920         if (IS_ERR_OR_NULL(proc_entry)) {
921                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
922                 kfree(isw);
923                 return;
924         }
925
926         new_iface->proc_ptr = proc_entry;
927
928         create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
929                                read_proc_u64,
930                                &new_iface->totals_via_dev[IFS_TX].bytes);
931         create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
932                                read_proc_u64,
933                                &new_iface->totals_via_dev[IFS_RX].bytes);
934         create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
935                                read_proc_u64,
936                                &new_iface->totals_via_dev[IFS_TX].packets);
937         create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
938                                read_proc_u64,
939                                &new_iface->totals_via_dev[IFS_RX].packets);
940         create_proc_read_entry("active", proc_iface_perms, proc_entry,
941                         read_proc_bool, &new_iface->active);
942
943         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
944                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
945         kfree(isw);
946 }
947
948 /*
949  * Will set the entry's active state, and
950  * update the net_dev accordingly also.
951  */
952 static void _iface_stat_set_active(struct iface_stat *entry,
953                                    struct net_device *net_dev,
954                                    bool activate)
955 {
956         if (activate) {
957                 entry->net_dev = net_dev;
958                 entry->active = true;
959                 IF_DEBUG("qtaguid: %s(%s): "
960                          "enable tracking. rfcnt=%d\n", __func__,
961                          entry->ifname,
962                          __this_cpu_read(*net_dev->pcpu_refcnt));
963         } else {
964                 entry->active = false;
965                 entry->net_dev = NULL;
966                 IF_DEBUG("qtaguid: %s(%s): "
967                          "disable tracking. rfcnt=%d\n", __func__,
968                          entry->ifname,
969                          __this_cpu_read(*net_dev->pcpu_refcnt));
970
971         }
972 }
973
974 /* Caller must hold iface_stat_list_lock */
975 static struct iface_stat *iface_alloc(struct net_device *net_dev)
976 {
977         struct iface_stat *new_iface;
978         struct iface_stat_work *isw;
979
980         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
981         if (new_iface == NULL) {
982                 pr_err("qtaguid: iface_stat: create(%s): "
983                        "iface_stat alloc failed\n", net_dev->name);
984                 return NULL;
985         }
986         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
987         if (new_iface->ifname == NULL) {
988                 pr_err("qtaguid: iface_stat: create(%s): "
989                        "ifname alloc failed\n", net_dev->name);
990                 kfree(new_iface);
991                 return NULL;
992         }
993         spin_lock_init(&new_iface->tag_stat_list_lock);
994         new_iface->tag_stat_tree = RB_ROOT;
995         _iface_stat_set_active(new_iface, net_dev, true);
996
997         /*
998          * ipv6 notifier chains are atomic :(
999          * No create_proc_read_entry() for you!
1000          */
1001         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
1002         if (!isw) {
1003                 pr_err("qtaguid: iface_stat: create(%s): "
1004                        "work alloc failed\n", new_iface->ifname);
1005                 _iface_stat_set_active(new_iface, net_dev, false);
1006                 kfree(new_iface->ifname);
1007                 kfree(new_iface);
1008                 return NULL;
1009         }
1010         isw->iface_entry = new_iface;
1011         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
1012         schedule_work(&isw->iface_work);
1013         list_add(&new_iface->list, &iface_stat_list);
1014         return new_iface;
1015 }
1016
1017 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
1018                                                struct iface_stat *iface)
1019 {
1020         struct rtnl_link_stats64 dev_stats, *stats;
1021         bool stats_rewound;
1022
1023         stats = dev_get_stats(net_dev, &dev_stats);
1024         /* No empty packets */
1025         stats_rewound =
1026                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
1027                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
1028
1029         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
1030                  "bytes rx/tx=%llu/%llu "
1031                  "active=%d last_known=%d "
1032                  "stats_rewound=%d\n", __func__,
1033                  net_dev ? net_dev->name : "?",
1034                  iface, net_dev,
1035                  stats->rx_bytes, stats->tx_bytes,
1036                  iface->active, iface->last_known_valid, stats_rewound);
1037
1038         if (iface->active && iface->last_known_valid && stats_rewound) {
1039                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
1040                              "iface reset its stats unexpectedly\n", __func__,
1041                              net_dev->name);
1042
1043                 iface->totals_via_dev[IFS_TX].bytes +=
1044                         iface->last_known[IFS_TX].bytes;
1045                 iface->totals_via_dev[IFS_TX].packets +=
1046                         iface->last_known[IFS_TX].packets;
1047                 iface->totals_via_dev[IFS_RX].bytes +=
1048                         iface->last_known[IFS_RX].bytes;
1049                 iface->totals_via_dev[IFS_RX].packets +=
1050                         iface->last_known[IFS_RX].packets;
1051                 iface->last_known_valid = false;
1052                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
1053                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
1054                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
1055                          iface->last_known[IFS_TX].bytes);
1056         }
1057 }
1058
1059 /*
1060  * Create a new entry for tracking the specified interface.
1061  * Do nothing if the entry already exists.
1062  * Called when an interface is configured with a valid IP address.
1063  */
1064 static void iface_stat_create(struct net_device *net_dev,
1065                               struct in_ifaddr *ifa)
1066 {
1067         struct in_device *in_dev = NULL;
1068         const char *ifname;
1069         struct iface_stat *entry;
1070         __be32 ipaddr = 0;
1071         struct iface_stat *new_iface;
1072
1073         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1074                  net_dev ? net_dev->name : "?",
1075                  ifa, net_dev);
1076         if (!net_dev) {
1077                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1078                 return;
1079         }
1080
1081         ifname = net_dev->name;
1082         if (!ifa) {
1083                 in_dev = in_dev_get(net_dev);
1084                 if (!in_dev) {
1085                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1086                                ifname);
1087                         return;
1088                 }
1089                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1090                          ifname, in_dev);
1091                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1092                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
1093                                  "ifa=%p ifa_label=%s\n",
1094                                  ifname, ifa,
1095                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
1096                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1097                                 break;
1098                 }
1099         }
1100
1101         if (!ifa) {
1102                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1103                          ifname);
1104                 goto done_put;
1105         }
1106         ipaddr = ifa->ifa_local;
1107
1108         spin_lock_bh(&iface_stat_list_lock);
1109         entry = get_iface_entry(ifname);
1110         if (entry != NULL) {
1111                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1112                          ifname, entry);
1113                 iface_check_stats_reset_and_adjust(net_dev, entry);
1114                 _iface_stat_set_active(entry, net_dev, true);
1115                 IF_DEBUG("qtaguid: %s(%s): "
1116                          "tracking now %d on ip=%pI4\n", __func__,
1117                          entry->ifname, true, &ipaddr);
1118                 goto done_unlock_put;
1119         }
1120
1121         new_iface = iface_alloc(net_dev);
1122         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1123                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1124 done_unlock_put:
1125         spin_unlock_bh(&iface_stat_list_lock);
1126 done_put:
1127         if (in_dev)
1128                 in_dev_put(in_dev);
1129 }
1130
1131 static void iface_stat_create_ipv6(struct net_device *net_dev,
1132                                    struct inet6_ifaddr *ifa)
1133 {
1134         struct in_device *in_dev;
1135         const char *ifname;
1136         struct iface_stat *entry;
1137         struct iface_stat *new_iface;
1138         int addr_type;
1139
1140         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1141                  ifa, net_dev, net_dev ? net_dev->name : "");
1142         if (!net_dev) {
1143                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1144                 return;
1145         }
1146         ifname = net_dev->name;
1147
1148         in_dev = in_dev_get(net_dev);
1149         if (!in_dev) {
1150                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1151                        ifname);
1152                 return;
1153         }
1154
1155         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1156                  ifname, in_dev);
1157
1158         if (!ifa) {
1159                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1160                          ifname);
1161                 goto done_put;
1162         }
1163         addr_type = ipv6_addr_type(&ifa->addr);
1164
1165         spin_lock_bh(&iface_stat_list_lock);
1166         entry = get_iface_entry(ifname);
1167         if (entry != NULL) {
1168                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1169                          ifname, entry);
1170                 iface_check_stats_reset_and_adjust(net_dev, entry);
1171                 _iface_stat_set_active(entry, net_dev, true);
1172                 IF_DEBUG("qtaguid: %s(%s): "
1173                          "tracking now %d on ip=%pI6c\n", __func__,
1174                          entry->ifname, true, &ifa->addr);
1175                 goto done_unlock_put;
1176         }
1177
1178         new_iface = iface_alloc(net_dev);
1179         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1180                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1181
1182 done_unlock_put:
1183         spin_unlock_bh(&iface_stat_list_lock);
1184 done_put:
1185         in_dev_put(in_dev);
1186 }
1187
1188 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1189 {
1190         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1191         return sock_tag_tree_search(&sock_tag_tree, sk);
1192 }
1193
1194 static struct sock_tag *get_sock_stat(const struct sock *sk)
1195 {
1196         struct sock_tag *sock_tag_entry;
1197         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1198         if (!sk)
1199                 return NULL;
1200         spin_lock_bh(&sock_tag_list_lock);
1201         sock_tag_entry = get_sock_stat_nl(sk);
1202         spin_unlock_bh(&sock_tag_list_lock);
1203         return sock_tag_entry;
1204 }
1205
1206 static int ipx_proto(const struct sk_buff *skb,
1207                      struct xt_action_param *par)
1208 {
1209         int thoff = 0, tproto;
1210
1211         switch (par->family) {
1212         case NFPROTO_IPV6:
1213                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1214                 if (tproto < 0)
1215                         MT_DEBUG("%s(): transport header not found in ipv6"
1216                                  " skb=%p\n", __func__, skb);
1217                 break;
1218         case NFPROTO_IPV4:
1219                 tproto = ip_hdr(skb)->protocol;
1220                 break;
1221         default:
1222                 tproto = IPPROTO_RAW;
1223         }
1224         return tproto;
1225 }
1226
1227 static void
1228 data_counters_update(struct data_counters *dc, int set,
1229                      enum ifs_tx_rx direction, int proto, int bytes)
1230 {
1231         switch (proto) {
1232         case IPPROTO_TCP:
1233                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1234                 break;
1235         case IPPROTO_UDP:
1236                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1237                 break;
1238         case IPPROTO_IP:
1239         default:
1240                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1241                                     1);
1242                 break;
1243         }
1244 }
1245
1246 /*
1247  * Update stats for the specified interface. Do nothing if the entry
1248  * does not exist (when a device was never configured with an IP address).
1249  * Called when an device is being unregistered.
1250  */
1251 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1252 {
1253         struct rtnl_link_stats64 dev_stats, *stats;
1254         struct iface_stat *entry;
1255
1256         stats = dev_get_stats(net_dev, &dev_stats);
1257         spin_lock_bh(&iface_stat_list_lock);
1258         entry = get_iface_entry(net_dev->name);
1259         if (entry == NULL) {
1260                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1261                          net_dev->name);
1262                 spin_unlock_bh(&iface_stat_list_lock);
1263                 return;
1264         }
1265
1266         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1267                  net_dev->name, entry);
1268         if (!entry->active) {
1269                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1270                          net_dev->name);
1271                 spin_unlock_bh(&iface_stat_list_lock);
1272                 return;
1273         }
1274
1275         if (stash_only) {
1276                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1277                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1278                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1279                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1280                 entry->last_known_valid = true;
1281                 IF_DEBUG("qtaguid: %s(%s): "
1282                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1283                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1284                 spin_unlock_bh(&iface_stat_list_lock);
1285                 return;
1286         }
1287         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1288         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1289         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1290         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1291         /* We don't need the last_known[] anymore */
1292         entry->last_known_valid = false;
1293         _iface_stat_set_active(entry, net_dev, false);
1294         IF_DEBUG("qtaguid: %s(%s): "
1295                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1296                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1297         spin_unlock_bh(&iface_stat_list_lock);
1298 }
1299
1300 /*
1301  * Update stats for the specified interface from the skb.
1302  * Do nothing if the entry
1303  * does not exist (when a device was never configured with an IP address).
1304  * Called on each sk.
1305  */
1306 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1307                                        struct xt_action_param *par)
1308 {
1309         struct iface_stat *entry;
1310         const struct net_device *el_dev;
1311         enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1312         int bytes = skb->len;
1313         int proto;
1314
1315         if (!skb->dev) {
1316                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1317                 el_dev = par->in ? : par->out;
1318         } else {
1319                 const struct net_device *other_dev;
1320                 el_dev = skb->dev;
1321                 other_dev = par->in ? : par->out;
1322                 if (el_dev != other_dev) {
1323                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1324                                  "par->(in/out)=%p %s\n",
1325                                  par->hooknum, el_dev, el_dev->name, other_dev,
1326                                  other_dev->name);
1327                 }
1328         }
1329
1330         if (unlikely(!el_dev)) {
1331                 pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
1332                        par->hooknum, __func__);
1333                 BUG();
1334         } else if (unlikely(!el_dev->name)) {
1335                 pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
1336                        par->hooknum, __func__);
1337                 BUG();
1338         } else {
1339                 proto = ipx_proto(skb, par);
1340                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1341                          par->hooknum, el_dev->name, el_dev->type,
1342                          par->family, proto);
1343         }
1344
1345         spin_lock_bh(&iface_stat_list_lock);
1346         entry = get_iface_entry(el_dev->name);
1347         if (entry == NULL) {
1348                 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1349                          __func__, el_dev->name);
1350                 spin_unlock_bh(&iface_stat_list_lock);
1351                 return;
1352         }
1353
1354         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1355                  el_dev->name, entry);
1356
1357         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1358                              bytes);
1359         spin_unlock_bh(&iface_stat_list_lock);
1360 }
1361
1362 static void tag_stat_update(struct tag_stat *tag_entry,
1363                         enum ifs_tx_rx direction, int proto, int bytes)
1364 {
1365         int active_set;
1366         active_set = get_active_counter_set(tag_entry->tn.tag);
1367         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1368                  "dir=%d proto=%d bytes=%d)\n",
1369                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1370                  active_set, direction, proto, bytes);
1371         data_counters_update(&tag_entry->counters, active_set, direction,
1372                              proto, bytes);
1373         if (tag_entry->parent_counters)
1374                 data_counters_update(tag_entry->parent_counters, active_set,
1375                                      direction, proto, bytes);
1376 }
1377
1378 /*
1379  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1380  * the interface.
1381  * iface_entry->tag_stat_list_lock should be held.
1382  */
1383 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1384                                            tag_t tag)
1385 {
1386         struct tag_stat *new_tag_stat_entry = NULL;
1387         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1388                  " (uid=%u)\n", __func__,
1389                  iface_entry, tag, get_uid_from_tag(tag));
1390         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1391         if (!new_tag_stat_entry) {
1392                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1393                 goto done;
1394         }
1395         new_tag_stat_entry->tn.tag = tag;
1396         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1397 done:
1398         return new_tag_stat_entry;
1399 }
1400
1401 static void if_tag_stat_update(const char *ifname, uid_t uid,
1402                                const struct sock *sk, enum ifs_tx_rx direction,
1403                                int proto, int bytes)
1404 {
1405         struct tag_stat *tag_stat_entry;
1406         tag_t tag, acct_tag;
1407         tag_t uid_tag;
1408         struct data_counters *uid_tag_counters;
1409         struct sock_tag *sock_tag_entry;
1410         struct iface_stat *iface_entry;
1411         struct tag_stat *new_tag_stat = NULL;
1412         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1413                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1414                  ifname, uid, sk, direction, proto, bytes);
1415
1416
1417         iface_entry = get_iface_entry(ifname);
1418         if (!iface_entry) {
1419                 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1420                        ifname);
1421                 return;
1422         }
1423         /* It is ok to process data when an iface_entry is inactive */
1424
1425         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1426                  ifname, iface_entry);
1427
1428         /*
1429          * Look for a tagged sock.
1430          * It will have an acct_uid.
1431          */
1432         sock_tag_entry = get_sock_stat(sk);
1433         if (sock_tag_entry) {
1434                 tag = sock_tag_entry->tag;
1435                 acct_tag = get_atag_from_tag(tag);
1436                 uid_tag = get_utag_from_tag(tag);
1437         } else {
1438                 acct_tag = make_atag_from_value(0);
1439                 tag = combine_atag_with_uid(acct_tag, uid);
1440                 uid_tag = make_tag_from_uid(uid);
1441         }
1442         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1443                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1444                  tag, get_uid_from_tag(tag), iface_entry);
1445         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1446         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1447
1448         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1449                                               tag);
1450         if (tag_stat_entry) {
1451                 /*
1452                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1453                  * {0, uid_tag} will also get updated.
1454                  */
1455                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1456                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1457                 return;
1458         }
1459
1460         /* Loop over tag list under this interface for {0,uid_tag} */
1461         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1462                                               uid_tag);
1463         if (!tag_stat_entry) {
1464                 /* Here: the base uid_tag did not exist */
1465                 /*
1466                  * No parent counters. So
1467                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1468                  */
1469                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1470                 if (!new_tag_stat)
1471                         goto unlock;
1472                 uid_tag_counters = &new_tag_stat->counters;
1473         } else {
1474                 uid_tag_counters = &tag_stat_entry->counters;
1475         }
1476
1477         if (acct_tag) {
1478                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1479                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1480                 if (!new_tag_stat)
1481                         goto unlock;
1482                 new_tag_stat->parent_counters = uid_tag_counters;
1483         } else {
1484                 /*
1485                  * For new_tag_stat to be still NULL here would require:
1486                  *  {0, uid_tag} exists
1487                  *  and {acct_tag, uid_tag} doesn't exist
1488                  *  AND acct_tag == 0.
1489                  * Impossible. This reassures us that new_tag_stat
1490                  * below will always be assigned.
1491                  */
1492                 BUG_ON(!new_tag_stat);
1493         }
1494         tag_stat_update(new_tag_stat, direction, proto, bytes);
1495 unlock:
1496         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1497 }
1498
1499 static int iface_netdev_event_handler(struct notifier_block *nb,
1500                                       unsigned long event, void *ptr) {
1501         struct net_device *dev = ptr;
1502
1503         if (unlikely(module_passive))
1504                 return NOTIFY_DONE;
1505
1506         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1507                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1508                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1509
1510         switch (event) {
1511         case NETDEV_UP:
1512                 iface_stat_create(dev, NULL);
1513                 atomic64_inc(&qtu_events.iface_events);
1514                 break;
1515         case NETDEV_DOWN:
1516         case NETDEV_UNREGISTER:
1517                 iface_stat_update(dev, event == NETDEV_DOWN);
1518                 atomic64_inc(&qtu_events.iface_events);
1519                 break;
1520         }
1521         return NOTIFY_DONE;
1522 }
1523
1524 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1525                                          unsigned long event, void *ptr)
1526 {
1527         struct inet6_ifaddr *ifa = ptr;
1528         struct net_device *dev;
1529
1530         if (unlikely(module_passive))
1531                 return NOTIFY_DONE;
1532
1533         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1534                  "ev=0x%lx/%s ifa=%p\n",
1535                  event, netdev_evt_str(event), ifa);
1536
1537         switch (event) {
1538         case NETDEV_UP:
1539                 BUG_ON(!ifa || !ifa->idev);
1540                 dev = (struct net_device *)ifa->idev->dev;
1541                 iface_stat_create_ipv6(dev, ifa);
1542                 atomic64_inc(&qtu_events.iface_events);
1543                 break;
1544         case NETDEV_DOWN:
1545         case NETDEV_UNREGISTER:
1546                 BUG_ON(!ifa || !ifa->idev);
1547                 dev = (struct net_device *)ifa->idev->dev;
1548                 iface_stat_update(dev, event == NETDEV_DOWN);
1549                 atomic64_inc(&qtu_events.iface_events);
1550                 break;
1551         }
1552         return NOTIFY_DONE;
1553 }
1554
1555 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1556                                         unsigned long event, void *ptr)
1557 {
1558         struct in_ifaddr *ifa = ptr;
1559         struct net_device *dev;
1560
1561         if (unlikely(module_passive))
1562                 return NOTIFY_DONE;
1563
1564         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1565                  "ev=0x%lx/%s ifa=%p\n",
1566                  event, netdev_evt_str(event), ifa);
1567
1568         switch (event) {
1569         case NETDEV_UP:
1570                 BUG_ON(!ifa || !ifa->ifa_dev);
1571                 dev = ifa->ifa_dev->dev;
1572                 iface_stat_create(dev, ifa);
1573                 atomic64_inc(&qtu_events.iface_events);
1574                 break;
1575         case NETDEV_DOWN:
1576         case NETDEV_UNREGISTER:
1577                 BUG_ON(!ifa || !ifa->ifa_dev);
1578                 dev = ifa->ifa_dev->dev;
1579                 iface_stat_update(dev, event == NETDEV_DOWN);
1580                 atomic64_inc(&qtu_events.iface_events);
1581                 break;
1582         }
1583         return NOTIFY_DONE;
1584 }
1585
1586 static struct notifier_block iface_netdev_notifier_blk = {
1587         .notifier_call = iface_netdev_event_handler,
1588 };
1589
1590 static struct notifier_block iface_inetaddr_notifier_blk = {
1591         .notifier_call = iface_inetaddr_event_handler,
1592 };
1593
1594 static struct notifier_block iface_inet6addr_notifier_blk = {
1595         .notifier_call = iface_inet6addr_event_handler,
1596 };
1597
1598 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1599 {
1600         int err;
1601
1602         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1603         if (!iface_stat_procdir) {
1604                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1605                 err = -1;
1606                 goto err;
1607         }
1608
1609         iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1610                                                     proc_iface_perms,
1611                                                     parent_procdir);
1612         if (!iface_stat_all_procfile) {
1613                 pr_err("qtaguid: iface_stat: init "
1614                        " failed to create stat_old proc entry\n");
1615                 err = -1;
1616                 goto err_zap_entry;
1617         }
1618         iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read;
1619         iface_stat_all_procfile->data = (void *)1; /* fmt1 */
1620
1621         iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename,
1622                                                     proc_iface_perms,
1623                                                     parent_procdir);
1624         if (!iface_stat_fmt_procfile) {
1625                 pr_err("qtaguid: iface_stat: init "
1626                        " failed to create stat_all proc entry\n");
1627                 err = -1;
1628                 goto err_zap_all_stats_entry;
1629         }
1630         iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read;
1631         iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */
1632
1633
1634         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1635         if (err) {
1636                 pr_err("qtaguid: iface_stat: init "
1637                        "failed to register dev event handler\n");
1638                 goto err_zap_all_stats_entries;
1639         }
1640         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1641         if (err) {
1642                 pr_err("qtaguid: iface_stat: init "
1643                        "failed to register ipv4 dev event handler\n");
1644                 goto err_unreg_nd;
1645         }
1646
1647         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1648         if (err) {
1649                 pr_err("qtaguid: iface_stat: init "
1650                        "failed to register ipv6 dev event handler\n");
1651                 goto err_unreg_ip4_addr;
1652         }
1653         return 0;
1654
1655 err_unreg_ip4_addr:
1656         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1657 err_unreg_nd:
1658         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1659 err_zap_all_stats_entries:
1660         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1661 err_zap_all_stats_entry:
1662         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1663 err_zap_entry:
1664         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1665 err:
1666         return err;
1667 }
1668
1669 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1670                                     struct xt_action_param *par)
1671 {
1672         struct sock *sk;
1673         unsigned int hook_mask = (1 << par->hooknum);
1674
1675         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1676                  par->hooknum, par->family);
1677
1678         /*
1679          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1680          * return garbage SKs.
1681          */
1682         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1683                 return NULL;
1684
1685         switch (par->family) {
1686         case NFPROTO_IPV6:
1687                 sk = xt_socket_get6_sk(skb, par);
1688                 break;
1689         case NFPROTO_IPV4:
1690                 sk = xt_socket_get4_sk(skb, par);
1691                 break;
1692         default:
1693                 return NULL;
1694         }
1695
1696         /*
1697          * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1698          * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1699          * Not fixed in 3.0-r3 :(
1700          */
1701         if (sk) {
1702                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1703                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1704                 if (sk->sk_state  == TCP_TIME_WAIT) {
1705                         xt_socket_put_sk(sk);
1706                         sk = NULL;
1707                 }
1708         }
1709         return sk;
1710 }
1711
1712 static void account_for_uid(const struct sk_buff *skb,
1713                             const struct sock *alternate_sk, uid_t uid,
1714                             struct xt_action_param *par)
1715 {
1716         const struct net_device *el_dev;
1717
1718         if (!skb->dev) {
1719                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1720                 el_dev = par->in ? : par->out;
1721         } else {
1722                 const struct net_device *other_dev;
1723                 el_dev = skb->dev;
1724                 other_dev = par->in ? : par->out;
1725                 if (el_dev != other_dev) {
1726                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1727                                 "par->(in/out)=%p %s\n",
1728                                 par->hooknum, el_dev, el_dev->name, other_dev,
1729                                 other_dev->name);
1730                 }
1731         }
1732
1733         if (unlikely(!el_dev)) {
1734                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1735         } else if (unlikely(!el_dev->name)) {
1736                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1737         } else {
1738                 int proto = ipx_proto(skb, par);
1739                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1740                          par->hooknum, el_dev->name, el_dev->type,
1741                          par->family, proto);
1742
1743                 if_tag_stat_update(el_dev->name, uid,
1744                                 skb->sk ? skb->sk : alternate_sk,
1745                                 par->in ? IFS_RX : IFS_TX,
1746                                 proto, skb->len);
1747         }
1748 }
1749
1750 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1751 {
1752         const struct xt_qtaguid_match_info *info = par->matchinfo;
1753         const struct file *filp;
1754         bool got_sock = false;
1755         struct sock *sk;
1756         uid_t sock_uid;
1757         bool res;
1758
1759         if (unlikely(module_passive))
1760                 return (info->match ^ info->invert) == 0;
1761
1762         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1763                  par->hooknum, skb, par->in, par->out, par->family);
1764
1765         atomic64_inc(&qtu_events.match_calls);
1766         if (skb == NULL) {
1767                 res = (info->match ^ info->invert) == 0;
1768                 goto ret_res;
1769         }
1770
1771         switch (par->hooknum) {
1772         case NF_INET_PRE_ROUTING:
1773         case NF_INET_POST_ROUTING:
1774                 atomic64_inc(&qtu_events.match_calls_prepost);
1775                 iface_stat_update_from_skb(skb, par);
1776                 /*
1777                  * We are done in pre/post. The skb will get processed
1778                  * further alter.
1779                  */
1780                 res = (info->match ^ info->invert);
1781                 goto ret_res;
1782                 break;
1783         /* default: Fall through and do UID releated work */
1784         }
1785
1786         sk = skb->sk;
1787         if (sk == NULL) {
1788                 /*
1789                  * A missing sk->sk_socket happens when packets are in-flight
1790                  * and the matching socket is already closed and gone.
1791                  */
1792                 sk = qtaguid_find_sk(skb, par);
1793                 /*
1794                  * If we got the socket from the find_sk(), we will need to put
1795                  * it back, as nf_tproxy_get_sock_v4() got it.
1796                  */
1797                 got_sock = sk;
1798                 if (sk)
1799                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1800                 else
1801                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1802         } else {
1803                 atomic64_inc(&qtu_events.match_found_sk);
1804         }
1805         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1806                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1807         if (sk != NULL) {
1808                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1809                         par->hooknum, sk, sk->sk_socket,
1810                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1811                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1812                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1813                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1814         }
1815
1816         if (sk == NULL || sk->sk_socket == NULL) {
1817                 /*
1818                  * Here, the qtaguid_find_sk() using connection tracking
1819                  * couldn't find the owner, so for now we just count them
1820                  * against the system.
1821                  */
1822                 /*
1823                  * TODO: unhack how to force just accounting.
1824                  * For now we only do iface stats when the uid-owner is not
1825                  * requested.
1826                  */
1827                 if (!(info->match & XT_QTAGUID_UID))
1828                         account_for_uid(skb, sk, 0, par);
1829                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1830                         par->hooknum,
1831                         sk ? sk->sk_socket : NULL);
1832                 res = (info->match ^ info->invert) == 0;
1833                 atomic64_inc(&qtu_events.match_no_sk);
1834                 goto put_sock_ret_res;
1835         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1836                 res = false;
1837                 goto put_sock_ret_res;
1838         }
1839         filp = sk->sk_socket->file;
1840         if (filp == NULL) {
1841                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1842                 account_for_uid(skb, sk, 0, par);
1843                 res = ((info->match ^ info->invert) &
1844                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1845                 atomic64_inc(&qtu_events.match_no_sk_file);
1846                 goto put_sock_ret_res;
1847         }
1848         sock_uid = filp->f_cred->fsuid;
1849         /*
1850          * TODO: unhack how to force just accounting.
1851          * For now we only do iface stats when the uid-owner is not requested
1852          */
1853         if (!(info->match & XT_QTAGUID_UID))
1854                 account_for_uid(skb, sk, sock_uid, par);
1855
1856         /*
1857          * The following two tests fail the match when:
1858          *    id not in range AND no inverted condition requested
1859          * or id     in range AND    inverted condition requested
1860          * Thus (!a && b) || (a && !b) == a ^ b
1861          */
1862         if (info->match & XT_QTAGUID_UID)
1863                 if ((filp->f_cred->fsuid >= info->uid_min &&
1864                      filp->f_cred->fsuid <= info->uid_max) ^
1865                     !(info->invert & XT_QTAGUID_UID)) {
1866                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1867                                  par->hooknum);
1868                         res = false;
1869                         goto put_sock_ret_res;
1870                 }
1871         if (info->match & XT_QTAGUID_GID)
1872                 if ((filp->f_cred->fsgid >= info->gid_min &&
1873                                 filp->f_cred->fsgid <= info->gid_max) ^
1874                         !(info->invert & XT_QTAGUID_GID)) {
1875                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1876                                 par->hooknum);
1877                         res = false;
1878                         goto put_sock_ret_res;
1879                 }
1880
1881         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1882         res = true;
1883
1884 put_sock_ret_res:
1885         if (got_sock)
1886                 xt_socket_put_sk(sk);
1887 ret_res:
1888         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1889         return res;
1890 }
1891
1892 #ifdef DDEBUG
1893 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1894 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1895 {
1896         va_list args;
1897         char *fmt_buff;
1898         char *buff;
1899
1900         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1901                 return;
1902
1903         fmt_buff = kasprintf(GFP_ATOMIC,
1904                              "qtaguid: %s(): %s {\n", __func__, fmt);
1905         BUG_ON(!fmt_buff);
1906         va_start(args, fmt);
1907         buff = kvasprintf(GFP_ATOMIC,
1908                           fmt_buff, args);
1909         BUG_ON(!buff);
1910         pr_debug("%s", buff);
1911         kfree(fmt_buff);
1912         kfree(buff);
1913         va_end(args);
1914
1915         spin_lock_bh(&sock_tag_list_lock);
1916         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1917         spin_unlock_bh(&sock_tag_list_lock);
1918
1919         spin_lock_bh(&sock_tag_list_lock);
1920         spin_lock_bh(&uid_tag_data_tree_lock);
1921         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1922         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1923         spin_unlock_bh(&uid_tag_data_tree_lock);
1924         spin_unlock_bh(&sock_tag_list_lock);
1925
1926         spin_lock_bh(&iface_stat_list_lock);
1927         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1928         spin_unlock_bh(&iface_stat_list_lock);
1929
1930         pr_debug("qtaguid: %s(): }\n", __func__);
1931 }
1932 #else
1933 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1934 #endif
1935
1936 /*
1937  * Procfs reader to get all active socket tags using style "1)" as described in
1938  * fs/proc/generic.c
1939  */
1940 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1941                                   off_t items_to_skip, int char_count, int *eof,
1942                                   void *data)
1943 {
1944         char *outp = page;
1945         int len;
1946         uid_t uid;
1947         struct rb_node *node;
1948         struct sock_tag *sock_tag_entry;
1949         int item_index = 0;
1950         int indent_level = 0;
1951         long f_count;
1952
1953         if (unlikely(module_passive)) {
1954                 *eof = 1;
1955                 return 0;
1956         }
1957
1958         if (*eof)
1959                 return 0;
1960
1961         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u "
1962                  "page=%p off=%ld char_count=%d *eof=%d\n",
1963                  current->pid, current->tgid, current_fsuid(),
1964                  page, items_to_skip, char_count, *eof);
1965
1966         spin_lock_bh(&sock_tag_list_lock);
1967         for (node = rb_first(&sock_tag_tree);
1968              node;
1969              node = rb_next(node)) {
1970                 if (item_index++ < items_to_skip)
1971                         continue;
1972                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1973                 uid = get_uid_from_tag(sock_tag_entry->tag);
1974                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1975                          "pid=%u\n",
1976                          sock_tag_entry->sk,
1977                          sock_tag_entry->tag,
1978                          uid,
1979                          sock_tag_entry->pid
1980                         );
1981                 f_count = atomic_long_read(
1982                         &sock_tag_entry->socket->file->f_count);
1983                 len = snprintf(outp, char_count,
1984                                "sock=%p tag=0x%llx (uid=%u) pid=%u "
1985                                "f_count=%lu\n",
1986                                sock_tag_entry->sk,
1987                                sock_tag_entry->tag, uid,
1988                                sock_tag_entry->pid, f_count);
1989                 if (len >= char_count) {
1990                         spin_unlock_bh(&sock_tag_list_lock);
1991                         *outp = '\0';
1992                         return outp - page;
1993                 }
1994                 outp += len;
1995                 char_count -= len;
1996                 (*num_items_returned)++;
1997         }
1998         spin_unlock_bh(&sock_tag_list_lock);
1999
2000         if (item_index++ >= items_to_skip) {
2001                 len = snprintf(outp, char_count,
2002                                "events: sockets_tagged=%llu "
2003                                "sockets_untagged=%llu "
2004                                "counter_set_changes=%llu "
2005                                "delete_cmds=%llu "
2006                                "iface_events=%llu "
2007                                "match_calls=%llu "
2008                                "match_calls_prepost=%llu "
2009                                "match_found_sk=%llu "
2010                                "match_found_sk_in_ct=%llu "
2011                                "match_found_no_sk_in_ct=%llu "
2012                                "match_no_sk=%llu "
2013                                "match_no_sk_file=%llu\n",
2014                                atomic64_read(&qtu_events.sockets_tagged),
2015                                atomic64_read(&qtu_events.sockets_untagged),
2016                                atomic64_read(&qtu_events.counter_set_changes),
2017                                atomic64_read(&qtu_events.delete_cmds),
2018                                atomic64_read(&qtu_events.iface_events),
2019                                atomic64_read(&qtu_events.match_calls),
2020                                atomic64_read(&qtu_events.match_calls_prepost),
2021                                atomic64_read(&qtu_events.match_found_sk),
2022                                atomic64_read(&qtu_events.match_found_sk_in_ct),
2023                                atomic64_read(
2024                                        &qtu_events.match_found_no_sk_in_ct),
2025                                atomic64_read(&qtu_events.match_no_sk),
2026                                atomic64_read(&qtu_events.match_no_sk_file));
2027                 if (len >= char_count) {
2028                         *outp = '\0';
2029                         return outp - page;
2030                 }
2031                 outp += len;
2032                 char_count -= len;
2033                 (*num_items_returned)++;
2034         }
2035
2036         /* Count the following as part of the last item_index */
2037         if (item_index > items_to_skip) {
2038                 prdebug_full_state(indent_level, "proc ctrl");
2039         }
2040
2041         *eof = 1;
2042         return outp - page;
2043 }
2044
2045 /*
2046  * Delete socket tags, and stat tags associated with a given
2047  * accouting tag and uid.
2048  */
2049 static int ctrl_cmd_delete(const char *input)
2050 {
2051         char cmd;
2052         uid_t uid;
2053         uid_t entry_uid;
2054         tag_t acct_tag;
2055         tag_t tag;
2056         int res, argc;
2057         struct iface_stat *iface_entry;
2058         struct rb_node *node;
2059         struct sock_tag *st_entry;
2060         struct rb_root st_to_free_tree = RB_ROOT;
2061         struct tag_stat *ts_entry;
2062         struct tag_counter_set *tcs_entry;
2063         struct tag_ref *tr_entry;
2064         struct uid_tag_data *utd_entry;
2065
2066         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
2067         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
2068                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2069                  acct_tag, uid);
2070         if (argc < 2) {
2071                 res = -EINVAL;
2072                 goto err;
2073         }
2074         if (!valid_atag(acct_tag)) {
2075                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2076                 res = -EINVAL;
2077                 goto err;
2078         }
2079         if (argc < 3) {
2080                 uid = current_fsuid();
2081         } else if (!can_impersonate_uid(uid)) {
2082                 pr_info("qtaguid: ctrl_delete(%s): "
2083                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2084                         input, current->pid, current->tgid, current_fsuid());
2085                 res = -EPERM;
2086                 goto err;
2087         }
2088
2089         tag = combine_atag_with_uid(acct_tag, uid);
2090         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2091                  "looking for tag=0x%llx (uid=%u)\n",
2092                  input, tag, uid);
2093
2094         /* Delete socket tags */
2095         spin_lock_bh(&sock_tag_list_lock);
2096         node = rb_first(&sock_tag_tree);
2097         while (node) {
2098                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2099                 entry_uid = get_uid_from_tag(st_entry->tag);
2100                 node = rb_next(node);
2101                 if (entry_uid != uid)
2102                         continue;
2103
2104                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2105                          input, st_entry->tag, entry_uid);
2106
2107                 if (!acct_tag || st_entry->tag == tag) {
2108                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2109                         /* Can't sockfd_put() within spinlock, do it later. */
2110                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2111                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2112                         BUG_ON(tr_entry->num_sock_tags <= 0);
2113                         tr_entry->num_sock_tags--;
2114                         /*
2115                          * TODO: remove if, and start failing.
2116                          * This is a hack to work around the fact that in some
2117                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2118                          * and are trying to work around apps
2119                          * that didn't open the /dev/xt_qtaguid.
2120                          */
2121                         if (st_entry->list.next && st_entry->list.prev)
2122                                 list_del(&st_entry->list);
2123                 }
2124         }
2125         spin_unlock_bh(&sock_tag_list_lock);
2126
2127         sock_tag_tree_erase(&st_to_free_tree);
2128
2129         /* Delete tag counter-sets */
2130         spin_lock_bh(&tag_counter_set_list_lock);
2131         /* Counter sets are only on the uid tag, not full tag */
2132         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2133         if (tcs_entry) {
2134                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2135                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2136                          input,
2137                          tcs_entry->tn.tag,
2138                          get_uid_from_tag(tcs_entry->tn.tag),
2139                          tcs_entry->active_set);
2140                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2141                 kfree(tcs_entry);
2142         }
2143         spin_unlock_bh(&tag_counter_set_list_lock);
2144
2145         /*
2146          * If acct_tag is 0, then all entries belonging to uid are
2147          * erased.
2148          */
2149         spin_lock_bh(&iface_stat_list_lock);
2150         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2151                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2152                 node = rb_first(&iface_entry->tag_stat_tree);
2153                 while (node) {
2154                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2155                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2156                         node = rb_next(node);
2157
2158                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2159                                  "ts tag=0x%llx (uid=%u)\n",
2160                                  input, ts_entry->tn.tag, entry_uid);
2161
2162                         if (entry_uid != uid)
2163                                 continue;
2164                         if (!acct_tag || ts_entry->tn.tag == tag) {
2165                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2166                                          "erase ts: %s 0x%llx %u\n",
2167                                          input, iface_entry->ifname,
2168                                          get_atag_from_tag(ts_entry->tn.tag),
2169                                          entry_uid);
2170                                 rb_erase(&ts_entry->tn.node,
2171                                          &iface_entry->tag_stat_tree);
2172                                 kfree(ts_entry);
2173                         }
2174                 }
2175                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2176         }
2177         spin_unlock_bh(&iface_stat_list_lock);
2178
2179         /* Cleanup the uid_tag_data */
2180         spin_lock_bh(&uid_tag_data_tree_lock);
2181         node = rb_first(&uid_tag_data_tree);
2182         while (node) {
2183                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2184                 entry_uid = utd_entry->uid;
2185                 node = rb_next(node);
2186
2187                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2188                          "utd uid=%u\n",
2189                          input, entry_uid);
2190
2191                 if (entry_uid != uid)
2192                         continue;
2193                 /*
2194                  * Go over the tag_refs, and those that don't have
2195                  * sock_tags using them are freed.
2196                  */
2197                 put_tag_ref_tree(tag, utd_entry);
2198                 put_utd_entry(utd_entry);
2199         }
2200         spin_unlock_bh(&uid_tag_data_tree_lock);
2201
2202         atomic64_inc(&qtu_events.delete_cmds);
2203         res = 0;
2204
2205 err:
2206         return res;
2207 }
2208
2209 static int ctrl_cmd_counter_set(const char *input)
2210 {
2211         char cmd;
2212         uid_t uid = 0;
2213         tag_t tag;
2214         int res, argc;
2215         struct tag_counter_set *tcs;
2216         int counter_set;
2217
2218         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2219         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2220                  "set=%d uid=%u\n", input, argc, cmd,
2221                  counter_set, uid);
2222         if (argc != 3) {
2223                 res = -EINVAL;
2224                 goto err;
2225         }
2226         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2227                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2228                         input);
2229                 res = -EINVAL;
2230                 goto err;
2231         }
2232         if (!can_manipulate_uids()) {
2233                 pr_info("qtaguid: ctrl_counterset(%s): "
2234                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2235                         input, current->pid, current->tgid, current_fsuid());
2236                 res = -EPERM;
2237                 goto err;
2238         }
2239
2240         tag = make_tag_from_uid(uid);
2241         spin_lock_bh(&tag_counter_set_list_lock);
2242         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2243         if (!tcs) {
2244                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2245                 if (!tcs) {
2246                         spin_unlock_bh(&tag_counter_set_list_lock);
2247                         pr_err("qtaguid: ctrl_counterset(%s): "
2248                                "failed to alloc counter set\n",
2249                                input);
2250                         res = -ENOMEM;
2251                         goto err;
2252                 }
2253                 tcs->tn.tag = tag;
2254                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2255                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2256                          "(uid=%u) set=%d\n",
2257                          input, tag, get_uid_from_tag(tag), counter_set);
2258         }
2259         tcs->active_set = counter_set;
2260         spin_unlock_bh(&tag_counter_set_list_lock);
2261         atomic64_inc(&qtu_events.counter_set_changes);
2262         res = 0;
2263
2264 err:
2265         return res;
2266 }
2267
2268 static int ctrl_cmd_tag(const char *input)
2269 {
2270         char cmd;
2271         int sock_fd = 0;
2272         uid_t uid = 0;
2273         tag_t acct_tag = make_atag_from_value(0);
2274         tag_t full_tag;
2275         struct socket *el_socket;
2276         int res, argc;
2277         struct sock_tag *sock_tag_entry;
2278         struct tag_ref *tag_ref_entry;
2279         struct uid_tag_data *uid_tag_data_entry;
2280         struct proc_qtu_data *pqd_entry;
2281
2282         /* Unassigned args will get defaulted later. */
2283         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2284         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2285                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2286                  acct_tag, uid);
2287         if (argc < 2) {
2288                 res = -EINVAL;
2289                 goto err;
2290         }
2291         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2292         if (!el_socket) {
2293                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2294                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2295                         input, sock_fd, res, current->pid, current->tgid,
2296                         current_fsuid());
2297                 goto err;
2298         }
2299         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2300                  input, atomic_long_read(&el_socket->file->f_count),
2301                  el_socket->sk);
2302         if (argc < 3) {
2303                 acct_tag = make_atag_from_value(0);
2304         } else if (!valid_atag(acct_tag)) {
2305                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2306                 res = -EINVAL;
2307                 goto err_put;
2308         }
2309         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2310                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2311                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2312                  input, current->pid, current->tgid, current_uid(),
2313                  current_euid(), current_fsuid(),
2314                  xt_qtaguid_ctrl_file->gid,
2315                  in_group_p(xt_qtaguid_ctrl_file->gid),
2316                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2317         if (argc < 4) {
2318                 uid = current_fsuid();
2319         } else if (!can_impersonate_uid(uid)) {
2320                 pr_info("qtaguid: ctrl_tag(%s): "
2321                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2322                         input, current->pid, current->tgid, current_fsuid());
2323                 res = -EPERM;
2324                 goto err_put;
2325         }
2326         full_tag = combine_atag_with_uid(acct_tag, uid);
2327
2328         spin_lock_bh(&sock_tag_list_lock);
2329         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2330         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2331         if (IS_ERR(tag_ref_entry)) {
2332                 res = PTR_ERR(tag_ref_entry);
2333                 spin_unlock_bh(&sock_tag_list_lock);
2334                 goto err_put;
2335         }
2336         tag_ref_entry->num_sock_tags++;
2337         if (sock_tag_entry) {
2338                 struct tag_ref *prev_tag_ref_entry;
2339
2340                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2341                          "st@%p ...->f_count=%ld\n",
2342                          input, el_socket->sk, sock_tag_entry,
2343                          atomic_long_read(&el_socket->file->f_count));
2344                 /*
2345                  * This is a re-tagging, so release the sock_fd that was
2346                  * locked at the time of the 1st tagging.
2347                  * There is still the ref from this call's sockfd_lookup() so
2348                  * it can be done within the spinlock.
2349                  */
2350                 sockfd_put(sock_tag_entry->socket);
2351                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2352                                                     &uid_tag_data_entry);
2353                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2354                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2355                 prev_tag_ref_entry->num_sock_tags--;
2356                 sock_tag_entry->tag = full_tag;
2357         } else {
2358                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2359                          input, el_socket->sk);
2360                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2361                                          GFP_ATOMIC);
2362                 if (!sock_tag_entry) {
2363                         pr_err("qtaguid: ctrl_tag(%s): "
2364                                "socket tag alloc failed\n",
2365                                input);
2366                         spin_unlock_bh(&sock_tag_list_lock);
2367                         res = -ENOMEM;
2368                         goto err_tag_unref_put;
2369                 }
2370                 sock_tag_entry->sk = el_socket->sk;
2371                 sock_tag_entry->socket = el_socket;
2372                 sock_tag_entry->pid = current->tgid;
2373                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2374                                                             uid);
2375                 spin_lock_bh(&uid_tag_data_tree_lock);
2376                 pqd_entry = proc_qtu_data_tree_search(
2377                         &proc_qtu_data_tree, current->tgid);
2378                 /*
2379                  * TODO: remove if, and start failing.
2380                  * At first, we want to catch user-space code that is not
2381                  * opening the /dev/xt_qtaguid.
2382                  */
2383                 if (IS_ERR_OR_NULL(pqd_entry))
2384                         pr_warn_once(
2385                                 "qtaguid: %s(): "
2386                                 "User space forgot to open /dev/xt_qtaguid? "
2387                                 "pid=%u tgid=%u uid=%u\n", __func__,
2388                                 current->pid, current->tgid,
2389                                 current_fsuid());
2390                 else
2391                         list_add(&sock_tag_entry->list,
2392                                  &pqd_entry->sock_tag_list);
2393                 spin_unlock_bh(&uid_tag_data_tree_lock);
2394
2395                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2396                 atomic64_inc(&qtu_events.sockets_tagged);
2397         }
2398         spin_unlock_bh(&sock_tag_list_lock);
2399         /* We keep the ref to the socket (file) until it is untagged */
2400         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2401                  input, sock_tag_entry,
2402                  atomic_long_read(&el_socket->file->f_count));
2403         return 0;
2404
2405 err_tag_unref_put:
2406         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2407         tag_ref_entry->num_sock_tags--;
2408         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2409 err_put:
2410         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2411                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2412         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2413         sockfd_put(el_socket);
2414         return res;
2415
2416 err:
2417         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2418         return res;
2419 }
2420
2421 static int ctrl_cmd_untag(const char *input)
2422 {
2423         char cmd;
2424         int sock_fd = 0;
2425         struct socket *el_socket;
2426         int res, argc;
2427         struct sock_tag *sock_tag_entry;
2428         struct tag_ref *tag_ref_entry;
2429         struct uid_tag_data *utd_entry;
2430         struct proc_qtu_data *pqd_entry;
2431
2432         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2433         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2434                  input, argc, cmd, sock_fd);
2435         if (argc < 2) {
2436                 res = -EINVAL;
2437                 goto err;
2438         }
2439         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2440         if (!el_socket) {
2441                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2442                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2443                         input, sock_fd, res, current->pid, current->tgid,
2444                         current_fsuid());
2445                 goto err;
2446         }
2447         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2448                  input, atomic_long_read(&el_socket->file->f_count),
2449                  el_socket->sk);
2450         spin_lock_bh(&sock_tag_list_lock);
2451         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2452         if (!sock_tag_entry) {
2453                 spin_unlock_bh(&sock_tag_list_lock);
2454                 res = -EINVAL;
2455                 goto err_put;
2456         }
2457         /*
2458          * The socket already belongs to the current process
2459          * so it can do whatever it wants to it.
2460          */
2461         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2462
2463         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2464         BUG_ON(!tag_ref_entry);
2465         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2466         spin_lock_bh(&uid_tag_data_tree_lock);
2467         pqd_entry = proc_qtu_data_tree_search(
2468                 &proc_qtu_data_tree, current->tgid);
2469         /*
2470          * TODO: remove if, and start failing.
2471          * At first, we want to catch user-space code that is not
2472          * opening the /dev/xt_qtaguid.
2473          */
2474         if (IS_ERR_OR_NULL(pqd_entry))
2475                 pr_warn_once("qtaguid: %s(): "
2476                              "User space forgot to open /dev/xt_qtaguid? "
2477                              "pid=%u tgid=%u uid=%u\n", __func__,
2478                              current->pid, current->tgid, current_fsuid());
2479         else
2480                 list_del(&sock_tag_entry->list);
2481         spin_unlock_bh(&uid_tag_data_tree_lock);
2482         /*
2483          * We don't free tag_ref from the utd_entry here,
2484          * only during a cmd_delete().
2485          */
2486         tag_ref_entry->num_sock_tags--;
2487         spin_unlock_bh(&sock_tag_list_lock);
2488         /*
2489          * Release the sock_fd that was grabbed at tag time,
2490          * and once more for the sockfd_lookup() here.
2491          */
2492         sockfd_put(sock_tag_entry->socket);
2493         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2494                  input, sock_tag_entry,
2495                  atomic_long_read(&el_socket->file->f_count) - 1);
2496         sockfd_put(el_socket);
2497
2498         kfree(sock_tag_entry);
2499         atomic64_inc(&qtu_events.sockets_untagged);
2500
2501         return 0;
2502
2503 err_put:
2504         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2505                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2506         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2507         sockfd_put(el_socket);
2508         return res;
2509
2510 err:
2511         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2512         return res;
2513 }
2514
2515 static int qtaguid_ctrl_parse(const char *input, int count)
2516 {
2517         char cmd;
2518         int res;
2519
2520         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2521                  input, current->pid, current->tgid, current_fsuid());
2522
2523         cmd = input[0];
2524         /* Collect params for commands */
2525         switch (cmd) {
2526         case 'd':
2527                 res = ctrl_cmd_delete(input);
2528                 break;
2529
2530         case 's':
2531                 res = ctrl_cmd_counter_set(input);
2532                 break;
2533
2534         case 't':
2535                 res = ctrl_cmd_tag(input);
2536                 break;
2537
2538         case 'u':
2539                 res = ctrl_cmd_untag(input);
2540                 break;
2541
2542         default:
2543                 res = -EINVAL;
2544                 goto err;
2545         }
2546         if (!res)
2547                 res = count;
2548 err:
2549         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2550         return res;
2551 }
2552
2553 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2554 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2555                         unsigned long count, void *data)
2556 {
2557         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2558
2559         if (unlikely(module_passive))
2560                 return count;
2561
2562         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2563                 return -EINVAL;
2564
2565         if (copy_from_user(input_buf, buffer, count))
2566                 return -EFAULT;
2567
2568         input_buf[count] = '\0';
2569         return qtaguid_ctrl_parse(input_buf, count);
2570 }
2571
2572 struct proc_print_info {
2573         char *outp;
2574         char **num_items_returned;
2575         struct iface_stat *iface_entry;
2576         struct tag_stat *ts_entry;
2577         int item_index;
2578         int items_to_skip;
2579         int char_count;
2580 };
2581
2582 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2583 {
2584         int len;
2585         struct data_counters *cnts;
2586
2587         if (!ppi->item_index) {
2588                 if (ppi->item_index++ < ppi->items_to_skip)
2589                         return 0;
2590                 len = snprintf(ppi->outp, ppi->char_count,
2591                                "idx iface acct_tag_hex uid_tag_int cnt_set "
2592                                "rx_bytes rx_packets "
2593                                "tx_bytes tx_packets "
2594                                "rx_tcp_bytes rx_tcp_packets "
2595                                "rx_udp_bytes rx_udp_packets "
2596                                "rx_other_bytes rx_other_packets "
2597                                "tx_tcp_bytes tx_tcp_packets "
2598                                "tx_udp_bytes tx_udp_packets "
2599                                "tx_other_bytes tx_other_packets\n");
2600         } else {
2601                 tag_t tag = ppi->ts_entry->tn.tag;
2602                 uid_t stat_uid = get_uid_from_tag(tag);
2603                 /* Detailed tags are not available to everybody */
2604                 if (get_atag_from_tag(tag)
2605                     && !can_read_other_uid_stats(stat_uid)) {
2606                         CT_DEBUG("qtaguid: stats line: "
2607                                  "%s 0x%llx %u: insufficient priv "
2608                                  "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2609                                  ppi->iface_entry->ifname,
2610                                  get_atag_from_tag(tag), stat_uid,
2611                                  current->pid, current->tgid, current_fsuid(),
2612                                  xt_qtaguid_stats_file->gid);
2613                         return 0;
2614                 }
2615                 if (ppi->item_index++ < ppi->items_to_skip)
2616                         return 0;
2617                 cnts = &ppi->ts_entry->counters;
2618                 len = snprintf(
2619                         ppi->outp, ppi->char_count,
2620                         "%d %s 0x%llx %u %u "
2621                         "%llu %llu "
2622                         "%llu %llu "
2623                         "%llu %llu "
2624                         "%llu %llu "
2625                         "%llu %llu "
2626                         "%llu %llu "
2627                         "%llu %llu "
2628                         "%llu %llu\n",
2629                         ppi->item_index,
2630                         ppi->iface_entry->ifname,
2631                         get_atag_from_tag(tag),
2632                         stat_uid,
2633                         cnt_set,
2634                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
2635                         dc_sum_packets(cnts, cnt_set, IFS_RX),
2636                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
2637                         dc_sum_packets(cnts, cnt_set, IFS_TX),
2638                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2639                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2640                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2641                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2642                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2643                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2644                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2645                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2646                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2647                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2648                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2649                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2650         }
2651         return len;
2652 }
2653
2654 static bool pp_sets(struct proc_print_info *ppi)
2655 {
2656         int len;
2657         int counter_set;
2658         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2659              counter_set++) {
2660                 len = pp_stats_line(ppi, counter_set);
2661                 if (len >= ppi->char_count) {
2662                         *ppi->outp = '\0';
2663                         return false;
2664                 }
2665                 if (len) {
2666                         ppi->outp += len;
2667                         ppi->char_count -= len;
2668                         (*ppi->num_items_returned)++;
2669                 }
2670         }
2671         return true;
2672 }
2673
2674 /*
2675  * Procfs reader to get all tag stats using style "1)" as described in
2676  * fs/proc/generic.c
2677  * Groups all protocols tx/rx bytes.
2678  */
2679 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2680                                 off_t items_to_skip, int char_count, int *eof,
2681                                 void *data)
2682 {
2683         struct proc_print_info ppi;
2684         int len;
2685
2686         ppi.outp = page;
2687         ppi.item_index = 0;
2688         ppi.char_count = char_count;
2689         ppi.num_items_returned = num_items_returned;
2690         ppi.items_to_skip = items_to_skip;
2691
2692         if (unlikely(module_passive)) {
2693                 len = pp_stats_line(&ppi, 0);
2694                 /* The header should always be shorter than the buffer. */
2695                 BUG_ON(len >= ppi.char_count);
2696                 (*num_items_returned)++;
2697                 *eof = 1;
2698                 return len;
2699         }
2700
2701         CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u "
2702                  "page=%p *num_items_returned=%p off=%ld "
2703                  "char_count=%d *eof=%d\n",
2704                  current->pid, current->tgid, current_fsuid(),
2705                  page, *num_items_returned,
2706                  items_to_skip, char_count, *eof);
2707
2708         if (*eof)
2709                 return 0;
2710
2711         /* The idx is there to help debug when things go belly up. */
2712         len = pp_stats_line(&ppi, 0);
2713         /* Don't advance the outp unless the whole line was printed */
2714         if (len >= ppi.char_count) {
2715                 *ppi.outp = '\0';
2716                 return ppi.outp - page;
2717         }
2718         if (len) {
2719                 ppi.outp += len;
2720                 ppi.char_count -= len;
2721                 (*num_items_returned)++;
2722         }
2723
2724         spin_lock_bh(&iface_stat_list_lock);
2725         list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2726                 struct rb_node *node;
2727                 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2728                 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2729                      node;
2730                      node = rb_next(node)) {
2731                         ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2732                         if (!pp_sets(&ppi)) {
2733                                 spin_unlock_bh(
2734                                         &ppi.iface_entry->tag_stat_list_lock);
2735                                 spin_unlock_bh(&iface_stat_list_lock);
2736                                 return ppi.outp - page;
2737                         }
2738                 }
2739                 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2740         }
2741         spin_unlock_bh(&iface_stat_list_lock);
2742
2743         *eof = 1;
2744         return ppi.outp - page;
2745 }
2746
2747 /*------------------------------------------*/
2748 static int qtudev_open(struct inode *inode, struct file *file)
2749 {
2750         struct uid_tag_data *utd_entry;
2751         struct proc_qtu_data  *pqd_entry;
2752         struct proc_qtu_data  *new_pqd_entry;
2753         int res;
2754         bool utd_entry_found;
2755
2756         if (unlikely(qtu_proc_handling_passive))
2757                 return 0;
2758
2759         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2760                  current->pid, current->tgid, current_fsuid());
2761
2762         spin_lock_bh(&uid_tag_data_tree_lock);
2763
2764         /* Look for existing uid data, or alloc one. */
2765         utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2766         if (IS_ERR_OR_NULL(utd_entry)) {
2767                 res = PTR_ERR(utd_entry);
2768                 goto err_unlock;
2769         }
2770
2771         /* Look for existing PID based proc_data */
2772         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2773                                               current->tgid);
2774         if (pqd_entry) {
2775                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2776                        "%s already opened\n",
2777                        current->pid, current->tgid, current_fsuid(),
2778                        QTU_DEV_NAME);
2779                 res = -EBUSY;
2780                 goto err_unlock_free_utd;
2781         }
2782
2783         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2784         if (!new_pqd_entry) {
2785                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2786                        "proc data alloc failed\n",
2787                        current->pid, current->tgid, current_fsuid());
2788                 res = -ENOMEM;
2789                 goto err_unlock_free_utd;
2790         }
2791         new_pqd_entry->pid = current->tgid;
2792         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2793         new_pqd_entry->parent_tag_data = utd_entry;
2794         utd_entry->num_pqd++;
2795
2796         proc_qtu_data_tree_insert(new_pqd_entry,
2797                                   &proc_qtu_data_tree);
2798
2799         spin_unlock_bh(&uid_tag_data_tree_lock);
2800         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2801                  current_fsuid(), new_pqd_entry);
2802         file->private_data = new_pqd_entry;
2803         return 0;
2804
2805 err_unlock_free_utd:
2806         if (!utd_entry_found) {
2807                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2808                 kfree(utd_entry);
2809         }
2810 err_unlock:
2811         spin_unlock_bh(&uid_tag_data_tree_lock);
2812         return res;
2813 }
2814
2815 static int qtudev_release(struct inode *inode, struct file *file)
2816 {
2817         struct proc_qtu_data  *pqd_entry = file->private_data;
2818         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2819         struct sock_tag *st_entry;
2820         struct rb_root st_to_free_tree = RB_ROOT;
2821         struct list_head *entry, *next;
2822         struct tag_ref *tr;
2823
2824         if (unlikely(qtu_proc_handling_passive))
2825                 return 0;
2826
2827         /*
2828          * Do not trust the current->pid, it might just be a kworker cleaning
2829          * up after a dead proc.
2830          */
2831         DR_DEBUG("qtaguid: qtudev_release(): "
2832                  "pid=%u tgid=%u uid=%u "
2833                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2834                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2835                  pqd_entry, pqd_entry->pid, utd_entry,
2836                  utd_entry->num_active_tags);
2837
2838         spin_lock_bh(&sock_tag_list_lock);
2839         spin_lock_bh(&uid_tag_data_tree_lock);
2840
2841         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2842                 st_entry = list_entry(entry, struct sock_tag, list);
2843                 DR_DEBUG("qtaguid: %s(): "
2844                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2845                          __func__,
2846                          st_entry, st_entry->sk,
2847                          current->pid, current->tgid,
2848                          pqd_entry->parent_tag_data->uid);
2849
2850                 utd_entry = uid_tag_data_tree_search(
2851                         &uid_tag_data_tree,
2852                         get_uid_from_tag(st_entry->tag));
2853                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2854                 DR_DEBUG("qtaguid: %s(): "
2855                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2856                          st_entry->tag, utd_entry);
2857                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2858                                          st_entry->tag);
2859                 BUG_ON(!tr);
2860                 BUG_ON(tr->num_sock_tags <= 0);
2861                 tr->num_sock_tags--;
2862                 free_tag_ref_from_utd_entry(tr, utd_entry);
2863
2864                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2865                 list_del(&st_entry->list);
2866                 /* Can't sockfd_put() within spinlock, do it later. */
2867                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2868
2869                 /*
2870                  * Try to free the utd_entry if no other proc_qtu_data is
2871                  * using it (num_pqd is 0) and it doesn't have active tags
2872                  * (num_active_tags is 0).
2873                  */
2874                 put_utd_entry(utd_entry);
2875         }
2876
2877         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2878         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2879         pqd_entry->parent_tag_data->num_pqd--;
2880         put_utd_entry(pqd_entry->parent_tag_data);
2881         kfree(pqd_entry);
2882         file->private_data = NULL;
2883
2884         spin_unlock_bh(&uid_tag_data_tree_lock);
2885         spin_unlock_bh(&sock_tag_list_lock);
2886
2887
2888         sock_tag_tree_erase(&st_to_free_tree);
2889
2890         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2891                            current->pid, current->tgid);
2892         return 0;
2893 }
2894
2895 /*------------------------------------------*/
2896 static const struct file_operations qtudev_fops = {
2897         .owner = THIS_MODULE,
2898         .open = qtudev_open,
2899         .release = qtudev_release,
2900 };
2901
2902 static struct miscdevice qtu_device = {
2903         .minor = MISC_DYNAMIC_MINOR,
2904         .name = QTU_DEV_NAME,
2905         .fops = &qtudev_fops,
2906         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2907 };
2908
2909 /*------------------------------------------*/
2910 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2911 {
2912         int ret;
2913         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2914         if (!*res_procdir) {
2915                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2916                 ret = -ENOMEM;
2917                 goto no_dir;
2918         }
2919
2920         xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2921                                                 *res_procdir);
2922         if (!xt_qtaguid_ctrl_file) {
2923                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2924                         " file\n");
2925                 ret = -ENOMEM;
2926                 goto no_ctrl_entry;
2927         }
2928         xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2929         xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2930
2931         xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2932                                                 *res_procdir);
2933         if (!xt_qtaguid_stats_file) {
2934                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2935                         "file\n");
2936                 ret = -ENOMEM;
2937                 goto no_stats_entry;
2938         }
2939         xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2940         /*
2941          * TODO: add support counter hacking
2942          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2943          */
2944         return 0;
2945
2946 no_stats_entry:
2947         remove_proc_entry("ctrl", *res_procdir);
2948 no_ctrl_entry:
2949         remove_proc_entry("xt_qtaguid", NULL);
2950 no_dir:
2951         return ret;
2952 }
2953
2954 static struct xt_match qtaguid_mt_reg __read_mostly = {
2955         /*
2956          * This module masquerades as the "owner" module so that iptables
2957          * tools can deal with it.
2958          */
2959         .name       = "owner",
2960         .revision   = 1,
2961         .family     = NFPROTO_UNSPEC,
2962         .match      = qtaguid_mt,
2963         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2964         .me         = THIS_MODULE,
2965 };
2966
2967 static int __init qtaguid_mt_init(void)
2968 {
2969         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2970             || iface_stat_init(xt_qtaguid_procdir)
2971             || xt_register_match(&qtaguid_mt_reg)
2972             || misc_register(&qtu_device))
2973                 return -1;
2974         return 0;
2975 }
2976
2977 /*
2978  * TODO: allow unloading of the module.
2979  * For now stats are permanent.
2980  * Kconfig forces'y/n' and never an 'm'.
2981  */
2982
2983 module_init(qtaguid_mt_init);
2984 MODULE_AUTHOR("jpa <jpa@google.com>");
2985 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2986 MODULE_LICENSE("GPL");
2987 MODULE_ALIAS("ipt_owner");
2988 MODULE_ALIAS("ip6t_owner");
2989 MODULE_ALIAS("ipt_qtaguid");
2990 MODULE_ALIAS("ip6t_qtaguid");