netfilter: qtaguid: initialize a local var to keep compiler happy.
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/skbuff.h>
23 #include <linux/workqueue.h>
24 #include <net/addrconf.h>
25 #include <net/sock.h>
26 #include <net/tcp.h>
27 #include <net/udp.h>
28
29 #include <linux/netfilter/xt_socket.h>
30 #include "xt_qtaguid_internal.h"
31 #include "xt_qtaguid_print.h"
32
33 /*
34  * We only use the xt_socket funcs within a similar context to avoid unexpected
35  * return values.
36  */
37 #define XT_SOCKET_SUPPORTED_HOOKS \
38         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
39
40
41 static const char *module_procdirname = "xt_qtaguid";
42 static struct proc_dir_entry *xt_qtaguid_procdir;
43
44 static unsigned int proc_iface_perms = S_IRUGO;
45 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
46
47 static struct proc_dir_entry *xt_qtaguid_stats_file;
48 static unsigned int proc_stats_perms = S_IRUGO;
49 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
50
51 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
52 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
53 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
54 #else
55 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
56 #endif
57 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
58
59 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
60 #include <linux/android_aid.h>
61 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
62 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
63 #else
64 /* 0 means, don't limit anybody */
65 static gid_t proc_stats_readall_gid;
66 static gid_t proc_ctrl_write_gid;
67 #endif
68 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
69                    S_IRUGO | S_IWUSR);
70 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
71                    S_IRUGO | S_IWUSR);
72
73 /*
74  * Limit the number of active tags (via socket tags) for a given UID.
75  * Multiple processes could share the UID.
76  */
77 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
78 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
79
80 /*
81  * After the kernel has initiallized this module, it is still possible
82  * to make it passive.
83  * Setting passive to Y:
84  *  - the iface stats handling will not act on notifications.
85  *  - iptables matches will never match.
86  *  - ctrl commands silently succeed.
87  *  - stats are always empty.
88  * This is mostly usefull when a bug is suspected.
89  */
90 static bool module_passive;
91 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
92
93 /*
94  * Control how qtaguid data is tracked per proc/uid.
95  * Setting tag_tracking_passive to Y:
96  *  - don't create proc specific structs to track tags
97  *  - don't check that active tag stats exceed some limits.
98  *  - don't clean up socket tags on process exits.
99  * This is mostly usefull when a bug is suspected.
100  */
101 static bool qtu_proc_handling_passive;
102 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
103                    S_IRUGO | S_IWUSR);
104
105 #define QTU_DEV_NAME "xt_qtaguid"
106
107 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
108 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
109
110 /*---------------------------------------------------------------------------*/
111 static const char *iface_stat_procdirname = "iface_stat";
112 static struct proc_dir_entry *iface_stat_procdir;
113 static const char *iface_stat_all_procfilename = "iface_stat_all";
114 static struct proc_dir_entry *iface_stat_all_procfile;
115
116 /*
117  * Ordering of locks:
118  *  outer locks:
119  *    iface_stat_list_lock
120  *    sock_tag_list_lock
121  *  inner locks:
122  *    uid_tag_data_tree_lock
123  *    tag_counter_set_list_lock
124  * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
125  * is acquired.
126  *
127  * Call tree with all lock holders as of 2011-09-25:
128  *
129  * iface_stat_all_proc_read()
130  *   iface_stat_list_lock
131  *     (struct iface_stat)
132  *
133  * qtaguid_ctrl_proc_read()
134  *   sock_tag_list_lock
135  *     (sock_tag_tree)
136  *     (struct proc_qtu_data->sock_tag_list)
137  *   prdebug_full_state()
138  *     sock_tag_list_lock
139  *       (sock_tag_tree)
140  *     uid_tag_data_tree_lock
141  *       (uid_tag_data_tree)
142  *       (proc_qtu_data_tree)
143  *     iface_stat_list_lock
144  *
145  * qtaguid_stats_proc_read()
146  *   iface_stat_list_lock
147  *     struct iface_stat->tag_stat_list_lock
148  *
149  * qtudev_open()
150  *   uid_tag_data_tree_lock
151  *
152  * qtudev_release()
153  *   sock_tag_data_list_lock
154  *     uid_tag_data_tree_lock
155  *   prdebug_full_state()
156  *     sock_tag_list_lock
157  *     uid_tag_data_tree_lock
158  *     iface_stat_list_lock
159  *
160  * iface_netdev_event_handler()
161  *   iface_stat_create()
162  *     iface_stat_list_lock
163  *   iface_stat_update()
164  *     iface_stat_list_lock
165  *
166  * iface_inetaddr_event_handler()
167  *   iface_stat_create()
168  *     iface_stat_list_lock
169  *   iface_stat_update()
170  *     iface_stat_list_lock
171  *
172  * iface_inet6addr_event_handler()
173  *   iface_stat_create_ipv6()
174  *     iface_stat_list_lock
175  *   iface_stat_update()
176  *     iface_stat_list_lock
177  *
178  * qtaguid_mt()
179  *   account_for_uid()
180  *     if_tag_stat_update()
181  *       get_sock_stat()
182  *         sock_tag_list_lock
183  *       struct iface_stat->tag_stat_list_lock
184  *         tag_stat_update()
185  *           get_active_counter_set()
186  *             tag_counter_set_list_lock
187  *         tag_stat_update()
188  *           get_active_counter_set()
189  *             tag_counter_set_list_lock
190  *
191  *
192  * qtaguid_ctrl_parse()
193  *   ctrl_cmd_delete()
194  *     sock_tag_list_lock
195  *     tag_counter_set_list_lock
196  *     iface_stat_list_lock
197  *       struct iface_stat->tag_stat_list_lock
198  *     uid_tag_data_tree_lock
199  *   ctrl_cmd_counter_set()
200  *     tag_counter_set_list_lock
201  *   ctrl_cmd_tag()
202  *     sock_tag_list_lock
203  *       (sock_tag_tree)
204  *       get_tag_ref()
205  *         uid_tag_data_tree_lock
206  *           (uid_tag_data_tree)
207  *       uid_tag_data_tree_lock
208  *         (proc_qtu_data_tree)
209  *   ctrl_cmd_untag()
210  *     sock_tag_list_lock
211  *     uid_tag_data_tree_lock
212  *
213  */
214 static LIST_HEAD(iface_stat_list);
215 static DEFINE_SPINLOCK(iface_stat_list_lock);
216
217 static struct rb_root sock_tag_tree = RB_ROOT;
218 static DEFINE_SPINLOCK(sock_tag_list_lock);
219
220 static struct rb_root tag_counter_set_tree = RB_ROOT;
221 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
222
223 static struct rb_root uid_tag_data_tree = RB_ROOT;
224 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
225
226 static struct rb_root proc_qtu_data_tree = RB_ROOT;
227 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
228
229 static struct qtaguid_event_counts qtu_events;
230 /*----------------------------------------------*/
231 static bool can_manipulate_uids(void)
232 {
233         /* root pwnd */
234         return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
235                 || in_egroup_p(proc_ctrl_write_gid);
236 }
237
238 static bool can_impersonate_uid(uid_t uid)
239 {
240         return uid == current_fsuid() || can_manipulate_uids();
241 }
242
243 static bool can_read_other_uid_stats(uid_t uid)
244 {
245         /* root pwnd */
246         return unlikely(!current_fsuid()) || uid == current_fsuid()
247                 || unlikely(!proc_stats_readall_gid)
248                 || in_egroup_p(proc_stats_readall_gid);
249 }
250
251 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
252                                   enum ifs_tx_rx direction,
253                                   enum ifs_proto ifs_proto,
254                                   int bytes,
255                                   int packets)
256 {
257         counters->bpc[set][direction][ifs_proto].bytes += bytes;
258         counters->bpc[set][direction][ifs_proto].packets += packets;
259 }
260
261 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
262                                     int set,
263                                     enum ifs_tx_rx direction)
264 {
265         return counters->bpc[set][direction][IFS_TCP].bytes
266                 + counters->bpc[set][direction][IFS_UDP].bytes
267                 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
268 }
269
270 static inline uint64_t dc_sum_packets(struct data_counters *counters,
271                                       int set,
272                                       enum ifs_tx_rx direction)
273 {
274         return counters->bpc[set][direction][IFS_TCP].packets
275                 + counters->bpc[set][direction][IFS_UDP].packets
276                 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
277 }
278
279 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
280 {
281         struct rb_node *node = root->rb_node;
282
283         while (node) {
284                 struct tag_node *data = rb_entry(node, struct tag_node, node);
285                 int result;
286                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
287                          " node=%p data=%p\n", tag, node, data);
288                 result = tag_compare(tag, data->tag);
289                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
290                          " data.tag=0x%llx (uid=%u) res=%d\n",
291                          tag, data->tag, get_uid_from_tag(data->tag), result);
292                 if (result < 0)
293                         node = node->rb_left;
294                 else if (result > 0)
295                         node = node->rb_right;
296                 else
297                         return data;
298         }
299         return NULL;
300 }
301
302 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
303 {
304         struct rb_node **new = &(root->rb_node), *parent = NULL;
305
306         /* Figure out where to put new node */
307         while (*new) {
308                 struct tag_node *this = rb_entry(*new, struct tag_node,
309                                                  node);
310                 int result = tag_compare(data->tag, this->tag);
311                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
312                          " (uid=%u)\n", __func__,
313                          this->tag,
314                          get_uid_from_tag(this->tag));
315                 parent = *new;
316                 if (result < 0)
317                         new = &((*new)->rb_left);
318                 else if (result > 0)
319                         new = &((*new)->rb_right);
320                 else
321                         BUG();
322         }
323
324         /* Add new node and rebalance tree. */
325         rb_link_node(&data->node, parent, new);
326         rb_insert_color(&data->node, root);
327 }
328
329 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
330 {
331         tag_node_tree_insert(&data->tn, root);
332 }
333
334 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
335 {
336         struct tag_node *node = tag_node_tree_search(root, tag);
337         if (!node)
338                 return NULL;
339         return rb_entry(&node->node, struct tag_stat, tn.node);
340 }
341
342 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
343                                         struct rb_root *root)
344 {
345         tag_node_tree_insert(&data->tn, root);
346 }
347
348 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
349                                                            tag_t tag)
350 {
351         struct tag_node *node = tag_node_tree_search(root, tag);
352         if (!node)
353                 return NULL;
354         return rb_entry(&node->node, struct tag_counter_set, tn.node);
355
356 }
357
358 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
359 {
360         tag_node_tree_insert(&data->tn, root);
361 }
362
363 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
364 {
365         struct tag_node *node = tag_node_tree_search(root, tag);
366         if (!node)
367                 return NULL;
368         return rb_entry(&node->node, struct tag_ref, tn.node);
369 }
370
371 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
372                                              const struct sock *sk)
373 {
374         struct rb_node *node = root->rb_node;
375
376         while (node) {
377                 struct sock_tag *data = rb_entry(node, struct sock_tag,
378                                                  sock_node);
379                 if (sk < data->sk)
380                         node = node->rb_left;
381                 else if (sk > data->sk)
382                         node = node->rb_right;
383                 else
384                         return data;
385         }
386         return NULL;
387 }
388
389 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
390 {
391         struct rb_node **new = &(root->rb_node), *parent = NULL;
392
393         /* Figure out where to put new node */
394         while (*new) {
395                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
396                                                  sock_node);
397                 parent = *new;
398                 if (data->sk < this->sk)
399                         new = &((*new)->rb_left);
400                 else if (data->sk > this->sk)
401                         new = &((*new)->rb_right);
402                 else
403                         BUG();
404         }
405
406         /* Add new node and rebalance tree. */
407         rb_link_node(&data->sock_node, parent, new);
408         rb_insert_color(&data->sock_node, root);
409 }
410
411 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
412 {
413         struct rb_node *node;
414         struct sock_tag *st_entry;
415
416         node = rb_first(st_to_free_tree);
417         while (node) {
418                 st_entry = rb_entry(node, struct sock_tag, sock_node);
419                 node = rb_next(node);
420                 CT_DEBUG("qtaguid: %s(): "
421                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
422                          st_entry->sk,
423                          st_entry->tag,
424                          get_uid_from_tag(st_entry->tag));
425                 rb_erase(&st_entry->sock_node, st_to_free_tree);
426                 sockfd_put(st_entry->socket);
427                 kfree(st_entry);
428         }
429 }
430
431 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
432                                                        const pid_t pid)
433 {
434         struct rb_node *node = root->rb_node;
435
436         while (node) {
437                 struct proc_qtu_data *data = rb_entry(node,
438                                                       struct proc_qtu_data,
439                                                       node);
440                 if (pid < data->pid)
441                         node = node->rb_left;
442                 else if (pid > data->pid)
443                         node = node->rb_right;
444                 else
445                         return data;
446         }
447         return NULL;
448 }
449
450 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
451                                       struct rb_root *root)
452 {
453         struct rb_node **new = &(root->rb_node), *parent = NULL;
454
455         /* Figure out where to put new node */
456         while (*new) {
457                 struct proc_qtu_data *this = rb_entry(*new,
458                                                       struct proc_qtu_data,
459                                                       node);
460                 parent = *new;
461                 if (data->pid < this->pid)
462                         new = &((*new)->rb_left);
463                 else if (data->pid > this->pid)
464                         new = &((*new)->rb_right);
465                 else
466                         BUG();
467         }
468
469         /* Add new node and rebalance tree. */
470         rb_link_node(&data->node, parent, new);
471         rb_insert_color(&data->node, root);
472 }
473
474 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
475                                      struct rb_root *root)
476 {
477         struct rb_node **new = &(root->rb_node), *parent = NULL;
478
479         /* Figure out where to put new node */
480         while (*new) {
481                 struct uid_tag_data *this = rb_entry(*new,
482                                                      struct uid_tag_data,
483                                                      node);
484                 parent = *new;
485                 if (data->uid < this->uid)
486                         new = &((*new)->rb_left);
487                 else if (data->uid > this->uid)
488                         new = &((*new)->rb_right);
489                 else
490                         BUG();
491         }
492
493         /* Add new node and rebalance tree. */
494         rb_link_node(&data->node, parent, new);
495         rb_insert_color(&data->node, root);
496 }
497
498 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
499                                                      uid_t uid)
500 {
501         struct rb_node *node = root->rb_node;
502
503         while (node) {
504                 struct uid_tag_data *data = rb_entry(node,
505                                                      struct uid_tag_data,
506                                                      node);
507                 if (uid < data->uid)
508                         node = node->rb_left;
509                 else if (uid > data->uid)
510                         node = node->rb_right;
511                 else
512                         return data;
513         }
514         return NULL;
515 }
516
517 /*
518  * Allocates a new uid_tag_data struct if needed.
519  * Returns a pointer to the found or allocated uid_tag_data.
520  * Returns a PTR_ERR on failures, and lock is not held.
521  * If found is not NULL:
522  *   sets *found to true if not allocated.
523  *   sets *found to false if allocated.
524  */
525 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
526 {
527         struct uid_tag_data *utd_entry;
528
529         /* Look for top level uid_tag_data for the UID */
530         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
531         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
532
533         if (found_res)
534                 *found_res = utd_entry;
535         if (utd_entry)
536                 return utd_entry;
537
538         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
539         if (!utd_entry) {
540                 pr_err("qtaguid: get_uid_data(%u): "
541                        "tag data alloc failed\n", uid);
542                 return ERR_PTR(-ENOMEM);
543         }
544
545         utd_entry->uid = uid;
546         utd_entry->tag_ref_tree = RB_ROOT;
547         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
548         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
549         return utd_entry;
550 }
551
552 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
553 static struct tag_ref *new_tag_ref(tag_t new_tag,
554                                    struct uid_tag_data *utd_entry)
555 {
556         struct tag_ref *tr_entry;
557         int res;
558
559         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
560                 pr_info("qtaguid: new_tag_ref(0x%llx): "
561                         "tag ref alloc quota exceeded. max=%d\n",
562                         new_tag, max_sock_tags);
563                 res = -EMFILE;
564                 goto err_res;
565
566         }
567
568         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
569         if (!tr_entry) {
570                 pr_err("qtaguid: new_tag_ref(0x%llx): "
571                        "tag ref alloc failed\n",
572                        new_tag);
573                 res = -ENOMEM;
574                 goto err_res;
575         }
576         tr_entry->tn.tag = new_tag;
577         /* tr_entry->num_sock_tags  handled by caller */
578         utd_entry->num_active_tags++;
579         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
580         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
581                  " inserted new tag ref %p\n",
582                  new_tag, tr_entry);
583         return tr_entry;
584
585 err_res:
586         return ERR_PTR(res);
587 }
588
589 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
590                                       struct uid_tag_data **utd_res)
591 {
592         struct uid_tag_data *utd_entry;
593         struct tag_ref *tr_entry;
594         bool found_utd;
595         uid_t uid = get_uid_from_tag(full_tag);
596
597         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
598                  full_tag, uid);
599
600         utd_entry = get_uid_data(uid, &found_utd);
601         if (IS_ERR_OR_NULL(utd_entry)) {
602                 if (utd_res)
603                         *utd_res = utd_entry;
604                 return NULL;
605         }
606
607         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
608         if (utd_res)
609                 *utd_res = utd_entry;
610         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
611                  full_tag, utd_entry, tr_entry);
612         return tr_entry;
613 }
614
615 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
616 static struct tag_ref *get_tag_ref(tag_t full_tag,
617                                    struct uid_tag_data **utd_res)
618 {
619         struct uid_tag_data *utd_entry;
620         struct tag_ref *tr_entry;
621
622         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
623                  full_tag);
624         spin_lock_bh(&uid_tag_data_tree_lock);
625         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
626         BUG_ON(IS_ERR_OR_NULL(utd_entry));
627         if (!tr_entry)
628                 tr_entry = new_tag_ref(full_tag, utd_entry);
629
630         spin_unlock_bh(&uid_tag_data_tree_lock);
631         if (utd_res)
632                 *utd_res = utd_entry;
633         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
634                  full_tag, utd_entry, tr_entry);
635         return tr_entry;
636 }
637
638 /* Checks and maybe frees the UID Tag Data entry */
639 static void put_utd_entry(struct uid_tag_data *utd_entry)
640 {
641         /* Are we done with the UID tag data entry? */
642         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
643                 !utd_entry->num_pqd) {
644                 DR_DEBUG("qtaguid: %s(): "
645                          "erase utd_entry=%p uid=%u "
646                          "by pid=%u tgid=%u uid=%u\n", __func__,
647                          utd_entry, utd_entry->uid,
648                          current->pid, current->tgid, current_fsuid());
649                 BUG_ON(utd_entry->num_active_tags);
650                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
651                 kfree(utd_entry);
652         } else {
653                 DR_DEBUG("qtaguid: %s(): "
654                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
655                          __func__, utd_entry, utd_entry->num_active_tags,
656                          utd_entry->num_pqd);
657                 BUG_ON(!(utd_entry->num_active_tags ||
658                          utd_entry->num_pqd));
659         }
660 }
661
662 /*
663  * If no sock_tags are using this tag_ref,
664  * decrements refcount of utd_entry, removes tr_entry
665  * from utd_entry->tag_ref_tree and frees.
666  */
667 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
668                                         struct uid_tag_data *utd_entry)
669 {
670         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
671                  tr_entry, tr_entry->tn.tag,
672                  get_uid_from_tag(tr_entry->tn.tag));
673         if (!tr_entry->num_sock_tags) {
674                 BUG_ON(!utd_entry->num_active_tags);
675                 utd_entry->num_active_tags--;
676                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
677                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
678                 kfree(tr_entry);
679         }
680 }
681
682 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
683 {
684         struct rb_node *node;
685         struct tag_ref *tr_entry;
686         tag_t acct_tag;
687
688         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
689                  full_tag, get_uid_from_tag(full_tag));
690         acct_tag = get_atag_from_tag(full_tag);
691         node = rb_first(&utd_entry->tag_ref_tree);
692         while (node) {
693                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
694                 node = rb_next(node);
695                 if (!acct_tag || tr_entry->tn.tag == full_tag)
696                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
697         }
698 }
699
700 static int read_proc_u64(char *page, char **start, off_t off,
701                         int count, int *eof, void *data)
702 {
703         int len;
704         uint64_t value;
705         char *p = page;
706         uint64_t *iface_entry = data;
707
708         if (!data)
709                 return 0;
710
711         value = *iface_entry;
712         p += sprintf(p, "%llu\n", value);
713         len = (p - page) - off;
714         *eof = (len <= count) ? 1 : 0;
715         *start = page + off;
716         return len;
717 }
718
719 static int read_proc_bool(char *page, char **start, off_t off,
720                         int count, int *eof, void *data)
721 {
722         int len;
723         bool value;
724         char *p = page;
725         bool *bool_entry = data;
726
727         if (!data)
728                 return 0;
729
730         value = *bool_entry;
731         p += sprintf(p, "%u\n", value);
732         len = (p - page) - off;
733         *eof = (len <= count) ? 1 : 0;
734         *start = page + off;
735         return len;
736 }
737
738 static int get_active_counter_set(tag_t tag)
739 {
740         int active_set = 0;
741         struct tag_counter_set *tcs;
742
743         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
744                  " (uid=%u)\n",
745                  tag, get_uid_from_tag(tag));
746         /* For now we only handle UID tags for active sets */
747         tag = get_utag_from_tag(tag);
748         spin_lock_bh(&tag_counter_set_list_lock);
749         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
750         if (tcs)
751                 active_set = tcs->active_set;
752         spin_unlock_bh(&tag_counter_set_list_lock);
753         return active_set;
754 }
755
756 /*
757  * Find the entry for tracking the specified interface.
758  * Caller must hold iface_stat_list_lock
759  */
760 static struct iface_stat *get_iface_entry(const char *ifname)
761 {
762         struct iface_stat *iface_entry;
763
764         /* Find the entry for tracking the specified tag within the interface */
765         if (ifname == NULL) {
766                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
767                 return NULL;
768         }
769
770         /* Iterate over interfaces */
771         list_for_each_entry(iface_entry, &iface_stat_list, list) {
772                 if (!strcmp(ifname, iface_entry->ifname))
773                         goto done;
774         }
775         iface_entry = NULL;
776 done:
777         return iface_entry;
778 }
779
780 static int iface_stat_all_proc_read(char *page, char **num_items_returned,
781                                     off_t items_to_skip, int char_count,
782                                     int *eof, void *data)
783 {
784         char *outp = page;
785         int item_index = 0;
786         int len;
787         struct iface_stat *iface_entry;
788         struct rtnl_link_stats64 dev_stats, *stats;
789         struct rtnl_link_stats64 no_dev_stats = {0};
790
791         if (unlikely(module_passive)) {
792                 *eof = 1;
793                 return 0;
794         }
795
796         CT_DEBUG("qtaguid:proc iface_stat_all "
797                  "page=%p *num_items_returned=%p off=%ld "
798                  "char_count=%d *eof=%d\n", page, *num_items_returned,
799                  items_to_skip, char_count, *eof);
800
801         if (*eof)
802                 return 0;
803
804         /*
805          * This lock will prevent iface_stat_update() from changing active,
806          * and in turn prevent an interface from unregistering itself.
807          */
808         spin_lock_bh(&iface_stat_list_lock);
809         list_for_each_entry(iface_entry, &iface_stat_list, list) {
810                 if (item_index++ < items_to_skip)
811                         continue;
812
813                 if (iface_entry->active) {
814                         stats = dev_get_stats(iface_entry->net_dev,
815                                               &dev_stats);
816                 } else {
817                         stats = &no_dev_stats;
818                 }
819                 len = snprintf(outp, char_count,
820                                "%s %d "
821                                "%llu %llu %llu %llu "
822                                "%llu %llu %llu %llu\n",
823                                iface_entry->ifname,
824                                iface_entry->active,
825                                iface_entry->totals[IFS_RX].bytes,
826                                iface_entry->totals[IFS_RX].packets,
827                                iface_entry->totals[IFS_TX].bytes,
828                                iface_entry->totals[IFS_TX].packets,
829                                stats->rx_bytes, stats->rx_packets,
830                                stats->tx_bytes, stats->tx_packets);
831                 if (len >= char_count) {
832                         spin_unlock_bh(&iface_stat_list_lock);
833                         *outp = '\0';
834                         return outp - page;
835                 }
836                 outp += len;
837                 char_count -= len;
838                 (*num_items_returned)++;
839         }
840         spin_unlock_bh(&iface_stat_list_lock);
841
842         *eof = 1;
843         return outp - page;
844 }
845
846 static void iface_create_proc_worker(struct work_struct *work)
847 {
848         struct proc_dir_entry *proc_entry;
849         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
850                                                    iface_work);
851         struct iface_stat *new_iface  = isw->iface_entry;
852
853         /* iface_entries are not deleted, so safe to manipulate. */
854         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
855         if (IS_ERR_OR_NULL(proc_entry)) {
856                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
857                 kfree(isw);
858                 return;
859         }
860
861         new_iface->proc_ptr = proc_entry;
862
863         create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
864                         read_proc_u64, &new_iface->totals[IFS_TX].bytes);
865         create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
866                         read_proc_u64, &new_iface->totals[IFS_RX].bytes);
867         create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
868                         read_proc_u64, &new_iface->totals[IFS_TX].packets);
869         create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
870                         read_proc_u64, &new_iface->totals[IFS_RX].packets);
871         create_proc_read_entry("active", proc_iface_perms, proc_entry,
872                         read_proc_bool, &new_iface->active);
873
874         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
875                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
876         kfree(isw);
877 }
878
879 /*
880  * Will set the entry's active state, and
881  * update the net_dev accordingly also.
882  */
883 static void _iface_stat_set_active(struct iface_stat *entry,
884                                    struct net_device *net_dev,
885                                    bool activate)
886 {
887         if (activate) {
888                 entry->net_dev = net_dev;
889                 entry->active = true;
890                 IF_DEBUG("qtaguid: %s(%s): "
891                          "enable tracking. rfcnt=%d\n", __func__,
892                          entry->ifname,
893                          __this_cpu_read(*net_dev->pcpu_refcnt));
894         } else {
895                 entry->active = false;
896                 entry->net_dev = NULL;
897                 IF_DEBUG("qtaguid: %s(%s): "
898                          "disable tracking. rfcnt=%d\n", __func__,
899                          entry->ifname,
900                          __this_cpu_read(*net_dev->pcpu_refcnt));
901
902         }
903 }
904
905 /* Caller must hold iface_stat_list_lock */
906 static struct iface_stat *iface_alloc(struct net_device *net_dev)
907 {
908         struct iface_stat *new_iface;
909         struct iface_stat_work *isw;
910
911         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
912         if (new_iface == NULL) {
913                 pr_err("qtaguid: iface_stat: create(%s): "
914                        "iface_stat alloc failed\n", net_dev->name);
915                 return NULL;
916         }
917         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
918         if (new_iface->ifname == NULL) {
919                 pr_err("qtaguid: iface_stat: create(%s): "
920                        "ifname alloc failed\n", net_dev->name);
921                 kfree(new_iface);
922                 return NULL;
923         }
924         spin_lock_init(&new_iface->tag_stat_list_lock);
925         new_iface->tag_stat_tree = RB_ROOT;
926         _iface_stat_set_active(new_iface, net_dev, true);
927
928         /*
929          * ipv6 notifier chains are atomic :(
930          * No create_proc_read_entry() for you!
931          */
932         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
933         if (!isw) {
934                 pr_err("qtaguid: iface_stat: create(%s): "
935                        "work alloc failed\n", new_iface->ifname);
936                 _iface_stat_set_active(new_iface, net_dev, false);
937                 kfree(new_iface->ifname);
938                 kfree(new_iface);
939                 return NULL;
940         }
941         isw->iface_entry = new_iface;
942         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
943         schedule_work(&isw->iface_work);
944         list_add(&new_iface->list, &iface_stat_list);
945         return new_iface;
946 }
947
948 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
949                                                struct iface_stat *iface)
950 {
951         struct rtnl_link_stats64 dev_stats, *stats;
952         bool stats_rewound;
953
954         stats = dev_get_stats(net_dev, &dev_stats);
955         /* No empty packets */
956         stats_rewound =
957                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
958                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
959
960         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
961                  "bytes rx/tx=%llu/%llu "
962                  "active=%d last_known=%d "
963                  "stats_rewound=%d\n", __func__,
964                  net_dev ? net_dev->name : "?",
965                  iface, net_dev,
966                  stats->rx_bytes, stats->tx_bytes,
967                  iface->active, iface->last_known_valid, stats_rewound);
968
969         if (iface->active && iface->last_known_valid && stats_rewound) {
970                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
971                              "iface reset its stats unexpectedly\n", __func__,
972                              net_dev->name);
973
974                 iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
975                 iface->totals[IFS_TX].packets +=
976                         iface->last_known[IFS_TX].packets;
977                 iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
978                 iface->totals[IFS_RX].packets +=
979                         iface->last_known[IFS_RX].packets;
980                 iface->last_known_valid = false;
981                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
982                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
983                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
984                          iface->last_known[IFS_TX].bytes);
985         }
986 }
987
988 /*
989  * Create a new entry for tracking the specified interface.
990  * Do nothing if the entry already exists.
991  * Called when an interface is configured with a valid IP address.
992  */
993 static void iface_stat_create(struct net_device *net_dev,
994                               struct in_ifaddr *ifa)
995 {
996         struct in_device *in_dev = NULL;
997         const char *ifname;
998         struct iface_stat *entry;
999         __be32 ipaddr = 0;
1000         struct iface_stat *new_iface;
1001
1002         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1003                  net_dev ? net_dev->name : "?",
1004                  ifa, net_dev);
1005         if (!net_dev) {
1006                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1007                 return;
1008         }
1009
1010         ifname = net_dev->name;
1011         if (!ifa) {
1012                 in_dev = in_dev_get(net_dev);
1013                 if (!in_dev) {
1014                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1015                                ifname);
1016                         return;
1017                 }
1018                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1019                          ifname, in_dev);
1020                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1021                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
1022                                  "ifa=%p ifa_label=%s\n",
1023                                  ifname, ifa,
1024                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
1025                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1026                                 break;
1027                 }
1028         }
1029
1030         if (!ifa) {
1031                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1032                          ifname);
1033                 goto done_put;
1034         }
1035         ipaddr = ifa->ifa_local;
1036
1037         spin_lock_bh(&iface_stat_list_lock);
1038         entry = get_iface_entry(ifname);
1039         if (entry != NULL) {
1040                 bool activate = !ipv4_is_loopback(ipaddr);
1041                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1042                          ifname, entry);
1043                 iface_check_stats_reset_and_adjust(net_dev, entry);
1044                 _iface_stat_set_active(entry, net_dev, activate);
1045                 IF_DEBUG("qtaguid: %s(%s): "
1046                          "tracking now %d on ip=%pI4\n", __func__,
1047                          entry->ifname, activate, &ipaddr);
1048                 goto done_unlock_put;
1049         } else if (ipv4_is_loopback(ipaddr)) {
1050                 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1051                          "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
1052                 goto done_unlock_put;
1053         }
1054
1055         new_iface = iface_alloc(net_dev);
1056         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1057                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1058 done_unlock_put:
1059         spin_unlock_bh(&iface_stat_list_lock);
1060 done_put:
1061         if (in_dev)
1062                 in_dev_put(in_dev);
1063 }
1064
1065 static void iface_stat_create_ipv6(struct net_device *net_dev,
1066                                    struct inet6_ifaddr *ifa)
1067 {
1068         struct in_device *in_dev;
1069         const char *ifname;
1070         struct iface_stat *entry;
1071         struct iface_stat *new_iface;
1072         int addr_type;
1073
1074         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1075                  ifa, net_dev, net_dev ? net_dev->name : "");
1076         if (!net_dev) {
1077                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1078                 return;
1079         }
1080         ifname = net_dev->name;
1081
1082         in_dev = in_dev_get(net_dev);
1083         if (!in_dev) {
1084                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1085                        ifname);
1086                 return;
1087         }
1088
1089         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1090                  ifname, in_dev);
1091
1092         if (!ifa) {
1093                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1094                          ifname);
1095                 goto done_put;
1096         }
1097         addr_type = ipv6_addr_type(&ifa->addr);
1098
1099         spin_lock_bh(&iface_stat_list_lock);
1100         entry = get_iface_entry(ifname);
1101         if (entry != NULL) {
1102                 bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
1103                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1104                          ifname, entry);
1105                 iface_check_stats_reset_and_adjust(net_dev, entry);
1106                 _iface_stat_set_active(entry, net_dev, activate);
1107                 IF_DEBUG("qtaguid: %s(%s): "
1108                          "tracking now %d on ip=%pI6c\n", __func__,
1109                          entry->ifname, activate, &ifa->addr);
1110                 goto done_unlock_put;
1111         } else if (addr_type & IPV6_ADDR_LOOPBACK) {
1112                 IF_DEBUG("qtaguid: %s(%s): "
1113                          "ignore loopback dev. ip=%pI6c\n", __func__,
1114                          ifname, &ifa->addr);
1115                 goto done_unlock_put;
1116         }
1117
1118         new_iface = iface_alloc(net_dev);
1119         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1120                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1121
1122 done_unlock_put:
1123         spin_unlock_bh(&iface_stat_list_lock);
1124 done_put:
1125         in_dev_put(in_dev);
1126 }
1127
1128 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1129 {
1130         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1131         return sock_tag_tree_search(&sock_tag_tree, sk);
1132 }
1133
1134 static struct sock_tag *get_sock_stat(const struct sock *sk)
1135 {
1136         struct sock_tag *sock_tag_entry;
1137         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1138         if (!sk)
1139                 return NULL;
1140         spin_lock_bh(&sock_tag_list_lock);
1141         sock_tag_entry = get_sock_stat_nl(sk);
1142         spin_unlock_bh(&sock_tag_list_lock);
1143         return sock_tag_entry;
1144 }
1145
1146 static void
1147 data_counters_update(struct data_counters *dc, int set,
1148                      enum ifs_tx_rx direction, int proto, int bytes)
1149 {
1150         switch (proto) {
1151         case IPPROTO_TCP:
1152                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1153                 break;
1154         case IPPROTO_UDP:
1155                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1156                 break;
1157         case IPPROTO_IP:
1158         default:
1159                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1160                                     1);
1161                 break;
1162         }
1163 }
1164
1165 /*
1166  * Update stats for the specified interface. Do nothing if the entry
1167  * does not exist (when a device was never configured with an IP address).
1168  * Called when an device is being unregistered.
1169  */
1170 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1171 {
1172         struct rtnl_link_stats64 dev_stats, *stats;
1173         struct iface_stat *entry;
1174
1175         stats = dev_get_stats(net_dev, &dev_stats);
1176         spin_lock_bh(&iface_stat_list_lock);
1177         entry = get_iface_entry(net_dev->name);
1178         if (entry == NULL) {
1179                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1180                          net_dev->name);
1181                 spin_unlock_bh(&iface_stat_list_lock);
1182                 return;
1183         }
1184
1185         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1186                  net_dev->name, entry);
1187         if (!entry->active) {
1188                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1189                          net_dev->name);
1190                 spin_unlock_bh(&iface_stat_list_lock);
1191                 return;
1192         }
1193
1194         if (stash_only) {
1195                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1196                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1197                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1198                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1199                 entry->last_known_valid = true;
1200                 IF_DEBUG("qtaguid: %s(%s): "
1201                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1202                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1203                 spin_unlock_bh(&iface_stat_list_lock);
1204                 return;
1205         }
1206         entry->totals[IFS_TX].bytes += stats->tx_bytes;
1207         entry->totals[IFS_TX].packets += stats->tx_packets;
1208         entry->totals[IFS_RX].bytes += stats->rx_bytes;
1209         entry->totals[IFS_RX].packets += stats->rx_packets;
1210         /* We don't need the last_known[] anymore */
1211         entry->last_known_valid = false;
1212         _iface_stat_set_active(entry, net_dev, false);
1213         IF_DEBUG("qtaguid: %s(%s): "
1214                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1215                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1216         spin_unlock_bh(&iface_stat_list_lock);
1217 }
1218
1219 static void tag_stat_update(struct tag_stat *tag_entry,
1220                         enum ifs_tx_rx direction, int proto, int bytes)
1221 {
1222         int active_set;
1223         active_set = get_active_counter_set(tag_entry->tn.tag);
1224         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1225                  "dir=%d proto=%d bytes=%d)\n",
1226                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1227                  active_set, direction, proto, bytes);
1228         data_counters_update(&tag_entry->counters, active_set, direction,
1229                              proto, bytes);
1230         if (tag_entry->parent_counters)
1231                 data_counters_update(tag_entry->parent_counters, active_set,
1232                                      direction, proto, bytes);
1233 }
1234
1235 /*
1236  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1237  * the interface.
1238  * iface_entry->tag_stat_list_lock should be held.
1239  */
1240 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1241                                            tag_t tag)
1242 {
1243         struct tag_stat *new_tag_stat_entry = NULL;
1244         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1245                  " (uid=%u)\n", __func__,
1246                  iface_entry, tag, get_uid_from_tag(tag));
1247         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1248         if (!new_tag_stat_entry) {
1249                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1250                 goto done;
1251         }
1252         new_tag_stat_entry->tn.tag = tag;
1253         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1254 done:
1255         return new_tag_stat_entry;
1256 }
1257
1258 static void if_tag_stat_update(const char *ifname, uid_t uid,
1259                                const struct sock *sk, enum ifs_tx_rx direction,
1260                                int proto, int bytes)
1261 {
1262         struct tag_stat *tag_stat_entry;
1263         tag_t tag, acct_tag;
1264         tag_t uid_tag;
1265         struct data_counters *uid_tag_counters;
1266         struct sock_tag *sock_tag_entry;
1267         struct iface_stat *iface_entry;
1268         struct tag_stat *new_tag_stat = NULL;
1269         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1270                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1271                  ifname, uid, sk, direction, proto, bytes);
1272
1273
1274         iface_entry = get_iface_entry(ifname);
1275         if (!iface_entry) {
1276                 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1277                        ifname);
1278                 return;
1279         }
1280         /* It is ok to process data when an iface_entry is inactive */
1281
1282         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1283                  ifname, iface_entry);
1284
1285         /*
1286          * Look for a tagged sock.
1287          * It will have an acct_uid.
1288          */
1289         sock_tag_entry = get_sock_stat(sk);
1290         if (sock_tag_entry) {
1291                 tag = sock_tag_entry->tag;
1292                 acct_tag = get_atag_from_tag(tag);
1293                 uid_tag = get_utag_from_tag(tag);
1294         } else {
1295                 acct_tag = make_atag_from_value(0);
1296                 tag = combine_atag_with_uid(acct_tag, uid);
1297                 uid_tag = make_tag_from_uid(uid);
1298         }
1299         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1300                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1301                  tag, get_uid_from_tag(tag), iface_entry);
1302         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1303         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1304
1305         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1306                                               tag);
1307         if (tag_stat_entry) {
1308                 /*
1309                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1310                  * {0, uid_tag} will also get updated.
1311                  */
1312                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1313                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1314                 return;
1315         }
1316
1317         /* Loop over tag list under this interface for {0,uid_tag} */
1318         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1319                                               uid_tag);
1320         if (!tag_stat_entry) {
1321                 /* Here: the base uid_tag did not exist */
1322                 /*
1323                  * No parent counters. So
1324                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1325                  */
1326                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1327                 uid_tag_counters = &new_tag_stat->counters;
1328         } else {
1329                 uid_tag_counters = &tag_stat_entry->counters;
1330         }
1331
1332         if (acct_tag) {
1333                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1334                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1335                 new_tag_stat->parent_counters = uid_tag_counters;
1336         } else {
1337                 /*
1338                  * For new_tag_stat to be still NULL here would require:
1339                  *  {0, uid_tag} exists
1340                  *  and {acct_tag, uid_tag} doesn't exist
1341                  *  AND acct_tag == 0.
1342                  * Impossible. This reassures us that new_tag_stat
1343                  * below will always be assigned.
1344                  */
1345                 BUG_ON(!new_tag_stat);
1346         }
1347         tag_stat_update(new_tag_stat, direction, proto, bytes);
1348         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1349 }
1350
1351 static int iface_netdev_event_handler(struct notifier_block *nb,
1352                                       unsigned long event, void *ptr) {
1353         struct net_device *dev = ptr;
1354
1355         if (unlikely(module_passive))
1356                 return NOTIFY_DONE;
1357
1358         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1359                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1360                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1361
1362         switch (event) {
1363         case NETDEV_UP:
1364                 iface_stat_create(dev, NULL);
1365                 atomic64_inc(&qtu_events.iface_events);
1366                 break;
1367         case NETDEV_DOWN:
1368         case NETDEV_UNREGISTER:
1369                 iface_stat_update(dev, event == NETDEV_DOWN);
1370                 atomic64_inc(&qtu_events.iface_events);
1371                 break;
1372         }
1373         return NOTIFY_DONE;
1374 }
1375
1376 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1377                                          unsigned long event, void *ptr)
1378 {
1379         struct inet6_ifaddr *ifa = ptr;
1380         struct net_device *dev;
1381
1382         if (unlikely(module_passive))
1383                 return NOTIFY_DONE;
1384
1385         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1386                  "ev=0x%lx/%s ifa=%p\n",
1387                  event, netdev_evt_str(event), ifa);
1388
1389         switch (event) {
1390         case NETDEV_UP:
1391                 BUG_ON(!ifa || !ifa->idev);
1392                 dev = (struct net_device *)ifa->idev->dev;
1393                 iface_stat_create_ipv6(dev, ifa);
1394                 atomic64_inc(&qtu_events.iface_events);
1395                 break;
1396         case NETDEV_DOWN:
1397         case NETDEV_UNREGISTER:
1398                 BUG_ON(!ifa || !ifa->idev);
1399                 dev = (struct net_device *)ifa->idev->dev;
1400                 iface_stat_update(dev, event == NETDEV_DOWN);
1401                 atomic64_inc(&qtu_events.iface_events);
1402                 break;
1403         }
1404         return NOTIFY_DONE;
1405 }
1406
1407 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1408                                         unsigned long event, void *ptr)
1409 {
1410         struct in_ifaddr *ifa = ptr;
1411         struct net_device *dev;
1412
1413         if (unlikely(module_passive))
1414                 return NOTIFY_DONE;
1415
1416         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1417                  "ev=0x%lx/%s ifa=%p\n",
1418                  event, netdev_evt_str(event), ifa);
1419
1420         switch (event) {
1421         case NETDEV_UP:
1422                 BUG_ON(!ifa || !ifa->ifa_dev);
1423                 dev = ifa->ifa_dev->dev;
1424                 iface_stat_create(dev, ifa);
1425                 atomic64_inc(&qtu_events.iface_events);
1426                 break;
1427         case NETDEV_DOWN:
1428         case NETDEV_UNREGISTER:
1429                 BUG_ON(!ifa || !ifa->ifa_dev);
1430                 dev = ifa->ifa_dev->dev;
1431                 iface_stat_update(dev, event == NETDEV_DOWN);
1432                 atomic64_inc(&qtu_events.iface_events);
1433                 break;
1434         }
1435         return NOTIFY_DONE;
1436 }
1437
1438 static struct notifier_block iface_netdev_notifier_blk = {
1439         .notifier_call = iface_netdev_event_handler,
1440 };
1441
1442 static struct notifier_block iface_inetaddr_notifier_blk = {
1443         .notifier_call = iface_inetaddr_event_handler,
1444 };
1445
1446 static struct notifier_block iface_inet6addr_notifier_blk = {
1447         .notifier_call = iface_inet6addr_event_handler,
1448 };
1449
1450 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1451 {
1452         int err;
1453
1454         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1455         if (!iface_stat_procdir) {
1456                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1457                 err = -1;
1458                 goto err;
1459         }
1460
1461         iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1462                                                     proc_iface_perms,
1463                                                     parent_procdir);
1464         if (!iface_stat_all_procfile) {
1465                 pr_err("qtaguid: iface_stat: init "
1466                        " failed to create stat_all proc entry\n");
1467                 err = -1;
1468                 goto err_zap_entry;
1469         }
1470         iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
1471
1472
1473         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1474         if (err) {
1475                 pr_err("qtaguid: iface_stat: init "
1476                        "failed to register dev event handler\n");
1477                 goto err_zap_all_stats_entry;
1478         }
1479         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1480         if (err) {
1481                 pr_err("qtaguid: iface_stat: init "
1482                        "failed to register ipv4 dev event handler\n");
1483                 goto err_unreg_nd;
1484         }
1485
1486         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1487         if (err) {
1488                 pr_err("qtaguid: iface_stat: init "
1489                        "failed to register ipv6 dev event handler\n");
1490                 goto err_unreg_ip4_addr;
1491         }
1492         return 0;
1493
1494 err_unreg_ip4_addr:
1495         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1496 err_unreg_nd:
1497         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1498 err_zap_all_stats_entry:
1499         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1500 err_zap_entry:
1501         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1502 err:
1503         return err;
1504 }
1505
1506 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1507                                     struct xt_action_param *par)
1508 {
1509         struct sock *sk;
1510         unsigned int hook_mask = (1 << par->hooknum);
1511
1512         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1513                  par->hooknum, par->family);
1514
1515         /*
1516          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1517          * return garbage SKs.
1518          */
1519         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1520                 return NULL;
1521
1522         switch (par->family) {
1523         case NFPROTO_IPV6:
1524                 sk = xt_socket_get6_sk(skb, par);
1525                 break;
1526         case NFPROTO_IPV4:
1527                 sk = xt_socket_get4_sk(skb, par);
1528                 break;
1529         default:
1530                 return NULL;
1531         }
1532
1533         /*
1534          * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1535          * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1536          * Not fixed in 3.0-r3 :(
1537          */
1538         if (sk) {
1539                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1540                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1541                 if (sk->sk_state  == TCP_TIME_WAIT) {
1542                         xt_socket_put_sk(sk);
1543                         sk = NULL;
1544                 }
1545         }
1546         return sk;
1547 }
1548
1549 static void account_for_uid(const struct sk_buff *skb,
1550                             const struct sock *alternate_sk, uid_t uid,
1551                             struct xt_action_param *par)
1552 {
1553         const struct net_device *el_dev;
1554
1555         if (!skb->dev) {
1556                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1557                 el_dev = par->in ? : par->out;
1558         } else {
1559                 const struct net_device *other_dev;
1560                 el_dev = skb->dev;
1561                 other_dev = par->in ? : par->out;
1562                 if (el_dev != other_dev) {
1563                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1564                                 "par->(in/out)=%p %s\n",
1565                                 par->hooknum, el_dev, el_dev->name, other_dev,
1566                                 other_dev->name);
1567                 }
1568         }
1569
1570         if (unlikely(!el_dev)) {
1571                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1572         } else if (unlikely(!el_dev->name)) {
1573                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1574         } else {
1575                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
1576                          par->hooknum,
1577                          el_dev->name,
1578                          el_dev->type);
1579
1580                 if_tag_stat_update(el_dev->name, uid,
1581                                 skb->sk ? skb->sk : alternate_sk,
1582                                 par->in ? IFS_RX : IFS_TX,
1583                                 ip_hdr(skb)->protocol, skb->len);
1584         }
1585 }
1586
1587 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1588 {
1589         const struct xt_qtaguid_match_info *info = par->matchinfo;
1590         const struct file *filp;
1591         bool got_sock = false;
1592         struct sock *sk;
1593         uid_t sock_uid;
1594         bool res;
1595
1596         if (unlikely(module_passive))
1597                 return (info->match ^ info->invert) == 0;
1598
1599         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1600                  par->hooknum, skb, par->in, par->out, par->family);
1601
1602         atomic64_inc(&qtu_events.match_calls);
1603         if (skb == NULL) {
1604                 res = (info->match ^ info->invert) == 0;
1605                 goto ret_res;
1606         }
1607
1608         sk = skb->sk;
1609
1610         if (sk == NULL) {
1611                 /*
1612                  * A missing sk->sk_socket happens when packets are in-flight
1613                  * and the matching socket is already closed and gone.
1614                  */
1615                 sk = qtaguid_find_sk(skb, par);
1616                 /*
1617                  * If we got the socket from the find_sk(), we will need to put
1618                  * it back, as nf_tproxy_get_sock_v4() got it.
1619                  */
1620                 got_sock = sk;
1621                 if (sk)
1622                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1623                 else
1624                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1625         } else {
1626                 atomic64_inc(&qtu_events.match_found_sk);
1627         }
1628         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
1629                 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
1630         if (sk != NULL) {
1631                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1632                         par->hooknum, sk, sk->sk_socket,
1633                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1634                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1635                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1636                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1637         }
1638
1639         if (sk == NULL || sk->sk_socket == NULL) {
1640                 /*
1641                  * Here, the qtaguid_find_sk() using connection tracking
1642                  * couldn't find the owner, so for now we just count them
1643                  * against the system.
1644                  */
1645                 /*
1646                  * TODO: unhack how to force just accounting.
1647                  * For now we only do iface stats when the uid-owner is not
1648                  * requested.
1649                  */
1650                 if (!(info->match & XT_QTAGUID_UID))
1651                         account_for_uid(skb, sk, 0, par);
1652                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1653                         par->hooknum,
1654                         sk ? sk->sk_socket : NULL);
1655                 res = (info->match ^ info->invert) == 0;
1656                 atomic64_inc(&qtu_events.match_no_sk);
1657                 goto put_sock_ret_res;
1658         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1659                 res = false;
1660                 goto put_sock_ret_res;
1661         }
1662         filp = sk->sk_socket->file;
1663         if (filp == NULL) {
1664                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1665                 account_for_uid(skb, sk, 0, par);
1666                 res = ((info->match ^ info->invert) &
1667                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1668                 atomic64_inc(&qtu_events.match_no_sk_file);
1669                 goto put_sock_ret_res;
1670         }
1671         sock_uid = filp->f_cred->fsuid;
1672         /*
1673          * TODO: unhack how to force just accounting.
1674          * For now we only do iface stats when the uid-owner is not requested
1675          */
1676         if (!(info->match & XT_QTAGUID_UID))
1677                 account_for_uid(skb, sk, sock_uid, par);
1678
1679         /*
1680          * The following two tests fail the match when:
1681          *    id not in range AND no inverted condition requested
1682          * or id     in range AND    inverted condition requested
1683          * Thus (!a && b) || (a && !b) == a ^ b
1684          */
1685         if (info->match & XT_QTAGUID_UID)
1686                 if ((filp->f_cred->fsuid >= info->uid_min &&
1687                      filp->f_cred->fsuid <= info->uid_max) ^
1688                     !(info->invert & XT_QTAGUID_UID)) {
1689                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1690                                  par->hooknum);
1691                         res = false;
1692                         goto put_sock_ret_res;
1693                 }
1694         if (info->match & XT_QTAGUID_GID)
1695                 if ((filp->f_cred->fsgid >= info->gid_min &&
1696                                 filp->f_cred->fsgid <= info->gid_max) ^
1697                         !(info->invert & XT_QTAGUID_GID)) {
1698                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1699                                 par->hooknum);
1700                         res = false;
1701                         goto put_sock_ret_res;
1702                 }
1703
1704         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1705         res = true;
1706
1707 put_sock_ret_res:
1708         if (got_sock)
1709                 xt_socket_put_sk(sk);
1710 ret_res:
1711         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1712         return res;
1713 }
1714
1715 #ifdef DDEBUG
1716 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1717 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1718 {
1719         va_list args;
1720         char *fmt_buff;
1721         char *buff;
1722
1723         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1724                 return;
1725
1726         fmt_buff = kasprintf(GFP_ATOMIC,
1727                              "qtaguid: %s(): %s {\n", __func__, fmt);
1728         BUG_ON(!fmt_buff);
1729         va_start(args, fmt);
1730         buff = kvasprintf(GFP_ATOMIC,
1731                           fmt_buff, args);
1732         BUG_ON(!buff);
1733         pr_debug("%s", buff);
1734         kfree(fmt_buff);
1735         kfree(buff);
1736         va_end(args);
1737
1738         spin_lock_bh(&sock_tag_list_lock);
1739         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1740         spin_unlock_bh(&sock_tag_list_lock);
1741
1742         spin_lock_bh(&sock_tag_list_lock);
1743         spin_lock_bh(&uid_tag_data_tree_lock);
1744         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1745         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1746         spin_unlock_bh(&uid_tag_data_tree_lock);
1747         spin_unlock_bh(&sock_tag_list_lock);
1748
1749         spin_lock_bh(&iface_stat_list_lock);
1750         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1751         spin_unlock_bh(&iface_stat_list_lock);
1752
1753         pr_debug("qtaguid: %s(): }\n", __func__);
1754 }
1755 #else
1756 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1757 #endif
1758
1759 /*
1760  * Procfs reader to get all active socket tags using style "1)" as described in
1761  * fs/proc/generic.c
1762  */
1763 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1764                                   off_t items_to_skip, int char_count, int *eof,
1765                                   void *data)
1766 {
1767         char *outp = page;
1768         int len;
1769         uid_t uid;
1770         struct rb_node *node;
1771         struct sock_tag *sock_tag_entry;
1772         int item_index = 0;
1773         int indent_level = 0;
1774         long f_count;
1775
1776         if (unlikely(module_passive)) {
1777                 *eof = 1;
1778                 return 0;
1779         }
1780
1781         if (*eof)
1782                 return 0;
1783
1784         CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1785                 page, items_to_skip, char_count, *eof);
1786
1787         spin_lock_bh(&sock_tag_list_lock);
1788         for (node = rb_first(&sock_tag_tree);
1789              node;
1790              node = rb_next(node)) {
1791                 if (item_index++ < items_to_skip)
1792                         continue;
1793                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1794                 uid = get_uid_from_tag(sock_tag_entry->tag);
1795                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1796                          "pid=%u\n",
1797                          sock_tag_entry->sk,
1798                          sock_tag_entry->tag,
1799                          uid,
1800                          sock_tag_entry->pid
1801                         );
1802                 f_count = atomic_long_read(
1803                         &sock_tag_entry->socket->file->f_count);
1804                 len = snprintf(outp, char_count,
1805                                "sock=%p tag=0x%llx (uid=%u) pid=%u "
1806                                "f_count=%lu\n",
1807                                sock_tag_entry->sk,
1808                                sock_tag_entry->tag, uid,
1809                                sock_tag_entry->pid, f_count);
1810                 if (len >= char_count) {
1811                         spin_unlock_bh(&sock_tag_list_lock);
1812                         *outp = '\0';
1813                         return outp - page;
1814                 }
1815                 outp += len;
1816                 char_count -= len;
1817                 (*num_items_returned)++;
1818         }
1819         spin_unlock_bh(&sock_tag_list_lock);
1820
1821         if (item_index++ >= items_to_skip) {
1822                 len = snprintf(outp, char_count,
1823                                "events: sockets_tagged=%llu "
1824                                "sockets_untagged=%llu "
1825                                "counter_set_changes=%llu "
1826                                "delete_cmds=%llu "
1827                                "iface_events=%llu "
1828                                "match_calls=%llu "
1829                                "match_found_sk=%llu "
1830                                "match_found_sk_in_ct=%llu "
1831                                "match_found_no_sk_in_ct=%llu "
1832                                "match_no_sk=%llu "
1833                                "match_no_sk_file=%llu\n",
1834                                atomic64_read(&qtu_events.sockets_tagged),
1835                                atomic64_read(&qtu_events.sockets_untagged),
1836                                atomic64_read(&qtu_events.counter_set_changes),
1837                                atomic64_read(&qtu_events.delete_cmds),
1838                                atomic64_read(&qtu_events.iface_events),
1839                                atomic64_read(&qtu_events.match_calls),
1840                                atomic64_read(&qtu_events.match_found_sk),
1841                                atomic64_read(&qtu_events.match_found_sk_in_ct),
1842                                atomic64_read(
1843                                        &qtu_events.match_found_no_sk_in_ct),
1844                                atomic64_read(&qtu_events.match_no_sk),
1845                                atomic64_read(&qtu_events.match_no_sk_file));
1846                 if (len >= char_count) {
1847                         *outp = '\0';
1848                         return outp - page;
1849                 }
1850                 outp += len;
1851                 char_count -= len;
1852                 (*num_items_returned)++;
1853         }
1854
1855         /* Count the following as part of the last item_index */
1856         if (item_index > items_to_skip) {
1857                 prdebug_full_state(indent_level, "proc ctrl");
1858         }
1859
1860         *eof = 1;
1861         return outp - page;
1862 }
1863
1864 /*
1865  * Delete socket tags, and stat tags associated with a given
1866  * accouting tag and uid.
1867  */
1868 static int ctrl_cmd_delete(const char *input)
1869 {
1870         char cmd;
1871         uid_t uid;
1872         uid_t entry_uid;
1873         tag_t acct_tag;
1874         tag_t tag;
1875         int res, argc;
1876         struct iface_stat *iface_entry;
1877         struct rb_node *node;
1878         struct sock_tag *st_entry;
1879         struct rb_root st_to_free_tree = RB_ROOT;
1880         struct tag_stat *ts_entry;
1881         struct tag_counter_set *tcs_entry;
1882         struct tag_ref *tr_entry;
1883         struct uid_tag_data *utd_entry;
1884
1885         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1886         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1887                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1888                  acct_tag, uid);
1889         if (argc < 2) {
1890                 res = -EINVAL;
1891                 goto err;
1892         }
1893         if (!valid_atag(acct_tag)) {
1894                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1895                 res = -EINVAL;
1896                 goto err;
1897         }
1898         if (argc < 3) {
1899                 uid = current_fsuid();
1900         } else if (!can_impersonate_uid(uid)) {
1901                 pr_info("qtaguid: ctrl_delete(%s): "
1902                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
1903                         input, current->pid, current->tgid, current_fsuid());
1904                 res = -EPERM;
1905                 goto err;
1906         }
1907
1908         tag = combine_atag_with_uid(acct_tag, uid);
1909         CT_DEBUG("qtaguid: ctrl_delete(%s): "
1910                  "looking for tag=0x%llx (uid=%u)\n",
1911                  input, tag, uid);
1912
1913         /* Delete socket tags */
1914         spin_lock_bh(&sock_tag_list_lock);
1915         node = rb_first(&sock_tag_tree);
1916         while (node) {
1917                 st_entry = rb_entry(node, struct sock_tag, sock_node);
1918                 entry_uid = get_uid_from_tag(st_entry->tag);
1919                 node = rb_next(node);
1920                 if (entry_uid != uid)
1921                         continue;
1922
1923                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
1924                          input, st_entry->tag, entry_uid);
1925
1926                 if (!acct_tag || st_entry->tag == tag) {
1927                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
1928                         /* Can't sockfd_put() within spinlock, do it later. */
1929                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
1930                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
1931                         BUG_ON(tr_entry->num_sock_tags <= 0);
1932                         tr_entry->num_sock_tags--;
1933                         /*
1934                          * TODO: remove if, and start failing.
1935                          * This is a hack to work around the fact that in some
1936                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
1937                          * and are trying to work around apps
1938                          * that didn't open the /dev/xt_qtaguid.
1939                          */
1940                         if (st_entry->list.next && st_entry->list.prev)
1941                                 list_del(&st_entry->list);
1942                 }
1943         }
1944         spin_unlock_bh(&sock_tag_list_lock);
1945
1946         sock_tag_tree_erase(&st_to_free_tree);
1947
1948         /* Delete tag counter-sets */
1949         spin_lock_bh(&tag_counter_set_list_lock);
1950         /* Counter sets are only on the uid tag, not full tag */
1951         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1952         if (tcs_entry) {
1953                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1954                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1955                          input,
1956                          tcs_entry->tn.tag,
1957                          get_uid_from_tag(tcs_entry->tn.tag),
1958                          tcs_entry->active_set);
1959                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1960                 kfree(tcs_entry);
1961         }
1962         spin_unlock_bh(&tag_counter_set_list_lock);
1963
1964         /*
1965          * If acct_tag is 0, then all entries belonging to uid are
1966          * erased.
1967          */
1968         spin_lock_bh(&iface_stat_list_lock);
1969         list_for_each_entry(iface_entry, &iface_stat_list, list) {
1970                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1971                 node = rb_first(&iface_entry->tag_stat_tree);
1972                 while (node) {
1973                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
1974                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
1975                         node = rb_next(node);
1976
1977                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
1978                                  "ts tag=0x%llx (uid=%u)\n",
1979                                  input, ts_entry->tn.tag, entry_uid);
1980
1981                         if (entry_uid != uid)
1982                                 continue;
1983                         if (!acct_tag || ts_entry->tn.tag == tag) {
1984                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1985                                          "erase ts: %s 0x%llx %u\n",
1986                                          input, iface_entry->ifname,
1987                                          get_atag_from_tag(ts_entry->tn.tag),
1988                                          entry_uid);
1989                                 rb_erase(&ts_entry->tn.node,
1990                                          &iface_entry->tag_stat_tree);
1991                                 kfree(ts_entry);
1992                         }
1993                 }
1994                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1995         }
1996         spin_unlock_bh(&iface_stat_list_lock);
1997
1998         /* Cleanup the uid_tag_data */
1999         spin_lock_bh(&uid_tag_data_tree_lock);
2000         node = rb_first(&uid_tag_data_tree);
2001         while (node) {
2002                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2003                 entry_uid = utd_entry->uid;
2004                 node = rb_next(node);
2005
2006                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2007                          "utd uid=%u\n",
2008                          input, entry_uid);
2009
2010                 if (entry_uid != uid)
2011                         continue;
2012                 /*
2013                  * Go over the tag_refs, and those that don't have
2014                  * sock_tags using them are freed.
2015                  */
2016                 put_tag_ref_tree(tag, utd_entry);
2017                 put_utd_entry(utd_entry);
2018         }
2019         spin_unlock_bh(&uid_tag_data_tree_lock);
2020
2021         atomic64_inc(&qtu_events.delete_cmds);
2022         res = 0;
2023
2024 err:
2025         return res;
2026 }
2027
2028 static int ctrl_cmd_counter_set(const char *input)
2029 {
2030         char cmd;
2031         uid_t uid = 0;
2032         tag_t tag;
2033         int res, argc;
2034         struct tag_counter_set *tcs;
2035         int counter_set;
2036
2037         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2038         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2039                  "set=%d uid=%u\n", input, argc, cmd,
2040                  counter_set, uid);
2041         if (argc != 3) {
2042                 res = -EINVAL;
2043                 goto err;
2044         }
2045         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2046                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2047                         input);
2048                 res = -EINVAL;
2049                 goto err;
2050         }
2051         if (!can_manipulate_uids()) {
2052                 pr_info("qtaguid: ctrl_counterset(%s): "
2053                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2054                         input, current->pid, current->tgid, current_fsuid());
2055                 res = -EPERM;
2056                 goto err;
2057         }
2058
2059         tag = make_tag_from_uid(uid);
2060         spin_lock_bh(&tag_counter_set_list_lock);
2061         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2062         if (!tcs) {
2063                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2064                 if (!tcs) {
2065                         spin_unlock_bh(&tag_counter_set_list_lock);
2066                         pr_err("qtaguid: ctrl_counterset(%s): "
2067                                "failed to alloc counter set\n",
2068                                input);
2069                         res = -ENOMEM;
2070                         goto err;
2071                 }
2072                 tcs->tn.tag = tag;
2073                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2074                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2075                          "(uid=%u) set=%d\n",
2076                          input, tag, get_uid_from_tag(tag), counter_set);
2077         }
2078         tcs->active_set = counter_set;
2079         spin_unlock_bh(&tag_counter_set_list_lock);
2080         atomic64_inc(&qtu_events.counter_set_changes);
2081         res = 0;
2082
2083 err:
2084         return res;
2085 }
2086
2087 static int ctrl_cmd_tag(const char *input)
2088 {
2089         char cmd;
2090         int sock_fd = 0;
2091         uid_t uid = 0;
2092         tag_t acct_tag = make_atag_from_value(0);
2093         tag_t full_tag;
2094         struct socket *el_socket;
2095         int res, argc;
2096         struct sock_tag *sock_tag_entry;
2097         struct tag_ref *tag_ref_entry;
2098         struct uid_tag_data *uid_tag_data_entry;
2099         struct proc_qtu_data *pqd_entry;
2100
2101         /* Unassigned args will get defaulted later. */
2102         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2103         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2104                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2105                  acct_tag, uid);
2106         if (argc < 2) {
2107                 res = -EINVAL;
2108                 goto err;
2109         }
2110         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2111         if (!el_socket) {
2112                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2113                         " sock_fd=%d err=%d\n", input, sock_fd, res);
2114                 goto err;
2115         }
2116         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2117                  input, atomic_long_read(&el_socket->file->f_count),
2118                  el_socket->sk);
2119         if (argc < 3) {
2120                 acct_tag = make_atag_from_value(0);
2121         } else if (!valid_atag(acct_tag)) {
2122                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2123                 res = -EINVAL;
2124                 goto err_put;
2125         }
2126         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2127                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2128                  "in_group=%d in_egroup=%d\n",
2129                  input, current->pid, current->tgid, current_uid(),
2130                  current_euid(), current_fsuid(),
2131                  in_group_p(proc_ctrl_write_gid),
2132                  in_egroup_p(proc_ctrl_write_gid));
2133         if (argc < 4) {
2134                 uid = current_fsuid();
2135         } else if (!can_impersonate_uid(uid)) {
2136                 pr_info("qtaguid: ctrl_tag(%s): "
2137                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2138                         input, current->pid, current->tgid, current_fsuid());
2139                 res = -EPERM;
2140                 goto err_put;
2141         }
2142         full_tag = combine_atag_with_uid(acct_tag, uid);
2143
2144         spin_lock_bh(&sock_tag_list_lock);
2145         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2146         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2147         if (IS_ERR(tag_ref_entry)) {
2148                 res = PTR_ERR(tag_ref_entry);
2149                 spin_unlock_bh(&sock_tag_list_lock);
2150                 goto err_put;
2151         }
2152         tag_ref_entry->num_sock_tags++;
2153         if (sock_tag_entry) {
2154                 struct tag_ref *prev_tag_ref_entry;
2155
2156                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2157                          "st@%p ...->f_count=%ld\n",
2158                          input, el_socket->sk, sock_tag_entry,
2159                          atomic_long_read(&el_socket->file->f_count));
2160                 /*
2161                  * This is a re-tagging, so release the sock_fd that was
2162                  * locked at the time of the 1st tagging.
2163                  * There is still the ref from this call's sockfd_lookup() so
2164                  * it can be done within the spinlock.
2165                  */
2166                 sockfd_put(sock_tag_entry->socket);
2167                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2168                                                     &uid_tag_data_entry);
2169                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2170                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2171                 prev_tag_ref_entry->num_sock_tags--;
2172                 sock_tag_entry->tag = full_tag;
2173         } else {
2174                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2175                          input, el_socket->sk);
2176                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2177                                          GFP_ATOMIC);
2178                 if (!sock_tag_entry) {
2179                         pr_err("qtaguid: ctrl_tag(%s): "
2180                                "socket tag alloc failed\n",
2181                                input);
2182                         spin_unlock_bh(&sock_tag_list_lock);
2183                         res = -ENOMEM;
2184                         goto err_tag_unref_put;
2185                 }
2186                 sock_tag_entry->sk = el_socket->sk;
2187                 sock_tag_entry->socket = el_socket;
2188                 sock_tag_entry->pid = current->tgid;
2189                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2190                                                             uid);
2191                 spin_lock_bh(&uid_tag_data_tree_lock);
2192                 pqd_entry = proc_qtu_data_tree_search(
2193                         &proc_qtu_data_tree, current->tgid);
2194                 /*
2195                  * TODO: remove if, and start failing.
2196                  * At first, we want to catch user-space code that is not
2197                  * opening the /dev/xt_qtaguid.
2198                  */
2199                 if (IS_ERR_OR_NULL(pqd_entry))
2200                         pr_warn_once(
2201                                 "qtaguid: %s(): "
2202                                 "User space forgot to open /dev/xt_qtaguid? "
2203                                 "pid=%u tgid=%u uid=%u\n", __func__,
2204                                 current->pid, current->tgid,
2205                                 current_fsuid());
2206                 else
2207                         list_add(&sock_tag_entry->list,
2208                                  &pqd_entry->sock_tag_list);
2209                 spin_unlock_bh(&uid_tag_data_tree_lock);
2210
2211                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2212                 atomic64_inc(&qtu_events.sockets_tagged);
2213         }
2214         spin_unlock_bh(&sock_tag_list_lock);
2215         /* We keep the ref to the socket (file) until it is untagged */
2216         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2217                  input, sock_tag_entry,
2218                  atomic_long_read(&el_socket->file->f_count));
2219         return 0;
2220
2221 err_tag_unref_put:
2222         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2223         tag_ref_entry->num_sock_tags--;
2224         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2225 err_put:
2226         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2227                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2228         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2229         sockfd_put(el_socket);
2230         return res;
2231
2232 err:
2233         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2234         return res;
2235 }
2236
2237 static int ctrl_cmd_untag(const char *input)
2238 {
2239         char cmd;
2240         int sock_fd = 0;
2241         struct socket *el_socket;
2242         int res, argc;
2243         struct sock_tag *sock_tag_entry;
2244         struct tag_ref *tag_ref_entry;
2245         struct uid_tag_data *utd_entry;
2246         struct proc_qtu_data *pqd_entry;
2247
2248         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2249         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2250                  input, argc, cmd, sock_fd);
2251         if (argc < 2) {
2252                 res = -EINVAL;
2253                 goto err;
2254         }
2255         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2256         if (!el_socket) {
2257                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2258                         " sock_fd=%d err=%d\n", input, sock_fd, res);
2259                 goto err;
2260         }
2261         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2262                  input, atomic_long_read(&el_socket->file->f_count),
2263                  el_socket->sk);
2264         spin_lock_bh(&sock_tag_list_lock);
2265         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2266         if (!sock_tag_entry) {
2267                 spin_unlock_bh(&sock_tag_list_lock);
2268                 res = -EINVAL;
2269                 goto err_put;
2270         }
2271         /*
2272          * The socket already belongs to the current process
2273          * so it can do whatever it wants to it.
2274          */
2275         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2276
2277         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2278         BUG_ON(!tag_ref_entry);
2279         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2280         spin_lock_bh(&uid_tag_data_tree_lock);
2281         pqd_entry = proc_qtu_data_tree_search(
2282                 &proc_qtu_data_tree, current->tgid);
2283         /*
2284          * TODO: remove if, and start failing.
2285          * At first, we want to catch user-space code that is not
2286          * opening the /dev/xt_qtaguid.
2287          */
2288         if (IS_ERR_OR_NULL(pqd_entry))
2289                 pr_warn_once("qtaguid: %s(): "
2290                              "User space forgot to open /dev/xt_qtaguid? "
2291                              "pid=%u tgid=%u uid=%u\n", __func__,
2292                              current->pid, current->tgid, current_fsuid());
2293         else
2294                 list_del(&sock_tag_entry->list);
2295         spin_unlock_bh(&uid_tag_data_tree_lock);
2296         /*
2297          * We don't free tag_ref from the utd_entry here,
2298          * only during a cmd_delete().
2299          */
2300         tag_ref_entry->num_sock_tags--;
2301         spin_unlock_bh(&sock_tag_list_lock);
2302         /*
2303          * Release the sock_fd that was grabbed at tag time,
2304          * and once more for the sockfd_lookup() here.
2305          */
2306         sockfd_put(sock_tag_entry->socket);
2307         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2308                  input, sock_tag_entry,
2309                  atomic_long_read(&el_socket->file->f_count) - 1);
2310         sockfd_put(el_socket);
2311
2312         kfree(sock_tag_entry);
2313         atomic64_inc(&qtu_events.sockets_untagged);
2314
2315         return 0;
2316
2317 err_put:
2318         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2319                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2320         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2321         sockfd_put(el_socket);
2322         return res;
2323
2324 err:
2325         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2326         return res;
2327 }
2328
2329 static int qtaguid_ctrl_parse(const char *input, int count)
2330 {
2331         char cmd;
2332         int res;
2333
2334         cmd = input[0];
2335         /* Collect params for commands */
2336         switch (cmd) {
2337         case 'd':
2338                 res = ctrl_cmd_delete(input);
2339                 break;
2340
2341         case 's':
2342                 res = ctrl_cmd_counter_set(input);
2343                 break;
2344
2345         case 't':
2346                 res = ctrl_cmd_tag(input);
2347                 break;
2348
2349         case 'u':
2350                 res = ctrl_cmd_untag(input);
2351                 break;
2352
2353         default:
2354                 res = -EINVAL;
2355                 goto err;
2356         }
2357         if (!res)
2358                 res = count;
2359 err:
2360         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2361         return res;
2362 }
2363
2364 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2365 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2366                         unsigned long count, void *data)
2367 {
2368         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2369
2370         if (unlikely(module_passive))
2371                 return count;
2372
2373         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2374                 return -EINVAL;
2375
2376         if (copy_from_user(input_buf, buffer, count))
2377                 return -EFAULT;
2378
2379         input_buf[count] = '\0';
2380         return qtaguid_ctrl_parse(input_buf, count);
2381 }
2382
2383 struct proc_print_info {
2384         char *outp;
2385         char **num_items_returned;
2386         struct iface_stat *iface_entry;
2387         struct tag_stat *ts_entry;
2388         int item_index;
2389         int items_to_skip;
2390         int char_count;
2391 };
2392
2393 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2394 {
2395         int len;
2396         struct data_counters *cnts;
2397
2398         if (!ppi->item_index) {
2399                 if (ppi->item_index++ < ppi->items_to_skip)
2400                         return 0;
2401                 len = snprintf(ppi->outp, ppi->char_count,
2402                                "idx iface acct_tag_hex uid_tag_int cnt_set "
2403                                "rx_bytes rx_packets "
2404                                "tx_bytes tx_packets "
2405                                "rx_tcp_bytes rx_tcp_packets "
2406                                "rx_udp_bytes rx_udp_packets "
2407                                "rx_other_bytes rx_other_packets "
2408                                "tx_tcp_bytes tx_tcp_packets "
2409                                "tx_udp_bytes tx_udp_packets "
2410                                "tx_other_bytes tx_other_packets\n");
2411         } else {
2412                 tag_t tag = ppi->ts_entry->tn.tag;
2413                 uid_t stat_uid = get_uid_from_tag(tag);
2414
2415                 if (!can_read_other_uid_stats(stat_uid)) {
2416                         CT_DEBUG("qtaguid: stats line: "
2417                                  "%s 0x%llx %u: insufficient priv "
2418                                  "from pid=%u tgid=%u uid=%u\n",
2419                                  ppi->iface_entry->ifname,
2420                                  get_atag_from_tag(tag), stat_uid,
2421                                  current->pid, current->tgid, current_fsuid());
2422                         return 0;
2423                 }
2424                 if (ppi->item_index++ < ppi->items_to_skip)
2425                         return 0;
2426                 cnts = &ppi->ts_entry->counters;
2427                 len = snprintf(
2428                         ppi->outp, ppi->char_count,
2429                         "%d %s 0x%llx %u %u "
2430                         "%llu %llu "
2431                         "%llu %llu "
2432                         "%llu %llu "
2433                         "%llu %llu "
2434                         "%llu %llu "
2435                         "%llu %llu "
2436                         "%llu %llu "
2437                         "%llu %llu\n",
2438                         ppi->item_index,
2439                         ppi->iface_entry->ifname,
2440                         get_atag_from_tag(tag),
2441                         stat_uid,
2442                         cnt_set,
2443                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
2444                         dc_sum_packets(cnts, cnt_set, IFS_RX),
2445                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
2446                         dc_sum_packets(cnts, cnt_set, IFS_TX),
2447                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2448                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2449                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2450                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2451                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2452                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2453                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2454                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2455                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2456                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2457                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2458                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2459         }
2460         return len;
2461 }
2462
2463 static bool pp_sets(struct proc_print_info *ppi)
2464 {
2465         int len;
2466         int counter_set;
2467         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2468              counter_set++) {
2469                 len = pp_stats_line(ppi, counter_set);
2470                 if (len >= ppi->char_count) {
2471                         *ppi->outp = '\0';
2472                         return false;
2473                 }
2474                 if (len) {
2475                         ppi->outp += len;
2476                         ppi->char_count -= len;
2477                         (*ppi->num_items_returned)++;
2478                 }
2479         }
2480         return true;
2481 }
2482
2483 /*
2484  * Procfs reader to get all tag stats using style "1)" as described in
2485  * fs/proc/generic.c
2486  * Groups all protocols tx/rx bytes.
2487  */
2488 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2489                                 off_t items_to_skip, int char_count, int *eof,
2490                                 void *data)
2491 {
2492         struct proc_print_info ppi;
2493         int len;
2494
2495         ppi.outp = page;
2496         ppi.item_index = 0;
2497         ppi.char_count = char_count;
2498         ppi.num_items_returned = num_items_returned;
2499         ppi.items_to_skip = items_to_skip;
2500
2501         if (unlikely(module_passive)) {
2502                 len = pp_stats_line(&ppi, 0);
2503                 /* The header should always be shorter than the buffer. */
2504                 BUG_ON(len >= ppi.char_count);
2505                 (*num_items_returned)++;
2506                 *eof = 1;
2507                 return len;
2508         }
2509
2510         CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
2511                 "char_count=%d *eof=%d\n", page, *num_items_returned,
2512                 items_to_skip, char_count, *eof);
2513
2514         if (*eof)
2515                 return 0;
2516
2517         /* The idx is there to help debug when things go belly up. */
2518         len = pp_stats_line(&ppi, 0);
2519         /* Don't advance the outp unless the whole line was printed */
2520         if (len >= ppi.char_count) {
2521                 *ppi.outp = '\0';
2522                 return ppi.outp - page;
2523         }
2524         if (len) {
2525                 ppi.outp += len;
2526                 ppi.char_count -= len;
2527                 (*num_items_returned)++;
2528         }
2529
2530         spin_lock_bh(&iface_stat_list_lock);
2531         list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2532                 struct rb_node *node;
2533                 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2534                 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2535                      node;
2536                      node = rb_next(node)) {
2537                         ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2538                         if (!pp_sets(&ppi)) {
2539                                 spin_unlock_bh(
2540                                         &ppi.iface_entry->tag_stat_list_lock);
2541                                 spin_unlock_bh(&iface_stat_list_lock);
2542                                 return ppi.outp - page;
2543                         }
2544                 }
2545                 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2546         }
2547         spin_unlock_bh(&iface_stat_list_lock);
2548
2549         *eof = 1;
2550         return ppi.outp - page;
2551 }
2552
2553 /*------------------------------------------*/
2554 static int qtudev_open(struct inode *inode, struct file *file)
2555 {
2556         struct uid_tag_data *utd_entry;
2557         struct proc_qtu_data  *pqd_entry;
2558         struct proc_qtu_data  *new_pqd_entry;
2559         int res;
2560         bool utd_entry_found;
2561
2562         if (unlikely(qtu_proc_handling_passive))
2563                 return 0;
2564
2565         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2566                  current->pid, current->tgid, current_fsuid());
2567
2568         spin_lock_bh(&uid_tag_data_tree_lock);
2569
2570         /* Look for existing uid data, or alloc one. */
2571         utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2572         if (IS_ERR_OR_NULL(utd_entry)) {
2573                 res = PTR_ERR(utd_entry);
2574                 goto err;
2575         }
2576
2577         /* Look for existing PID based proc_data */
2578         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2579                                               current->tgid);
2580         if (pqd_entry) {
2581                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2582                        "%s already opened\n",
2583                        current->pid, current->tgid, current_fsuid(),
2584                        QTU_DEV_NAME);
2585                 res = -EBUSY;
2586                 goto err_unlock_free_utd;
2587         }
2588
2589         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2590         if (!new_pqd_entry) {
2591                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2592                        "proc data alloc failed\n",
2593                        current->pid, current->tgid, current_fsuid());
2594                 res = -ENOMEM;
2595                 goto err_unlock_free_utd;
2596         }
2597         new_pqd_entry->pid = current->tgid;
2598         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2599         new_pqd_entry->parent_tag_data = utd_entry;
2600         utd_entry->num_pqd++;
2601
2602         proc_qtu_data_tree_insert(new_pqd_entry,
2603                                   &proc_qtu_data_tree);
2604
2605         spin_unlock_bh(&uid_tag_data_tree_lock);
2606         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2607                  current_fsuid(), new_pqd_entry);
2608         file->private_data = new_pqd_entry;
2609         return 0;
2610
2611 err_unlock_free_utd:
2612         if (!utd_entry_found) {
2613                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2614                 kfree(utd_entry);
2615         }
2616         spin_unlock_bh(&uid_tag_data_tree_lock);
2617 err:
2618         return res;
2619 }
2620
2621 static int qtudev_release(struct inode *inode, struct file *file)
2622 {
2623         struct proc_qtu_data  *pqd_entry = file->private_data;
2624         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2625         struct sock_tag *st_entry;
2626         struct rb_root st_to_free_tree = RB_ROOT;
2627         struct list_head *entry, *next;
2628         struct tag_ref *tr;
2629
2630         if (unlikely(qtu_proc_handling_passive))
2631                 return 0;
2632
2633         /*
2634          * Do not trust the current->pid, it might just be a kworker cleaning
2635          * up after a dead proc.
2636          */
2637         DR_DEBUG("qtaguid: qtudev_release(): "
2638                  "pid=%u tgid=%u uid=%u "
2639                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2640                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2641                  pqd_entry, pqd_entry->pid, utd_entry,
2642                  utd_entry->num_active_tags);
2643
2644         spin_lock_bh(&sock_tag_list_lock);
2645         spin_lock_bh(&uid_tag_data_tree_lock);
2646
2647         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2648                 st_entry = list_entry(entry, struct sock_tag, list);
2649                 DR_DEBUG("qtaguid: %s(): "
2650                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2651                          __func__,
2652                          st_entry, st_entry->sk,
2653                          current->pid, current->tgid,
2654                          pqd_entry->parent_tag_data->uid);
2655
2656                 utd_entry = uid_tag_data_tree_search(
2657                         &uid_tag_data_tree,
2658                         get_uid_from_tag(st_entry->tag));
2659                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2660                 DR_DEBUG("qtaguid: %s(): "
2661                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2662                          st_entry->tag, utd_entry);
2663                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2664                                          st_entry->tag);
2665                 BUG_ON(!tr);
2666                 BUG_ON(tr->num_sock_tags <= 0);
2667                 tr->num_sock_tags--;
2668                 free_tag_ref_from_utd_entry(tr, utd_entry);
2669
2670                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2671                 list_del(&st_entry->list);
2672                 /* Can't sockfd_put() within spinlock, do it later. */
2673                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2674
2675                 /*
2676                  * Try to free the utd_entry if no other proc_qtu_data is
2677                  * using it (num_pqd is 0) and it doesn't have active tags
2678                  * (num_active_tags is 0).
2679                  */
2680                 put_utd_entry(utd_entry);
2681         }
2682
2683         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2684         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2685         pqd_entry->parent_tag_data->num_pqd--;
2686         put_utd_entry(pqd_entry->parent_tag_data);
2687         kfree(pqd_entry);
2688         file->private_data = NULL;
2689
2690         spin_unlock_bh(&uid_tag_data_tree_lock);
2691         spin_unlock_bh(&sock_tag_list_lock);
2692
2693
2694         sock_tag_tree_erase(&st_to_free_tree);
2695
2696         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2697                            current->pid, current->tgid);
2698         return 0;
2699 }
2700
2701 /*------------------------------------------*/
2702 static const struct file_operations qtudev_fops = {
2703         .owner = THIS_MODULE,
2704         .open = qtudev_open,
2705         .release = qtudev_release,
2706 };
2707
2708 static struct miscdevice qtu_device = {
2709         .minor = MISC_DYNAMIC_MINOR,
2710         .name = QTU_DEV_NAME,
2711         .fops = &qtudev_fops,
2712         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2713 };
2714
2715 /*------------------------------------------*/
2716 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2717 {
2718         int ret;
2719         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2720         if (!*res_procdir) {
2721                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2722                 ret = -ENOMEM;
2723                 goto no_dir;
2724         }
2725
2726         xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2727                                                 *res_procdir);
2728         if (!xt_qtaguid_ctrl_file) {
2729                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2730                         " file\n");
2731                 ret = -ENOMEM;
2732                 goto no_ctrl_entry;
2733         }
2734         xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2735         xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2736
2737         xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2738                                                 *res_procdir);
2739         if (!xt_qtaguid_stats_file) {
2740                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2741                         "file\n");
2742                 ret = -ENOMEM;
2743                 goto no_stats_entry;
2744         }
2745         xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2746         /*
2747          * TODO: add support counter hacking
2748          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2749          */
2750         return 0;
2751
2752 no_stats_entry:
2753         remove_proc_entry("ctrl", *res_procdir);
2754 no_ctrl_entry:
2755         remove_proc_entry("xt_qtaguid", NULL);
2756 no_dir:
2757         return ret;
2758 }
2759
2760 static struct xt_match qtaguid_mt_reg __read_mostly = {
2761         /*
2762          * This module masquerades as the "owner" module so that iptables
2763          * tools can deal with it.
2764          */
2765         .name       = "owner",
2766         .revision   = 1,
2767         .family     = NFPROTO_UNSPEC,
2768         .match      = qtaguid_mt,
2769         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2770         .me         = THIS_MODULE,
2771 };
2772
2773 static int __init qtaguid_mt_init(void)
2774 {
2775         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2776             || iface_stat_init(xt_qtaguid_procdir)
2777             || xt_register_match(&qtaguid_mt_reg)
2778             || misc_register(&qtu_device))
2779                 return -1;
2780         return 0;
2781 }
2782
2783 /*
2784  * TODO: allow unloading of the module.
2785  * For now stats are permanent.
2786  * Kconfig forces'y/n' and never an 'm'.
2787  */
2788
2789 module_init(qtaguid_mt_init);
2790 MODULE_AUTHOR("jpa <jpa@google.com>");
2791 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2792 MODULE_LICENSE("GPL");
2793 MODULE_ALIAS("ipt_owner");
2794 MODULE_ALIAS("ip6t_owner");
2795 MODULE_ALIAS("ipt_qtaguid");
2796 MODULE_ALIAS("ip6t_qtaguid");