netfilter: xt_qtaguid: fix ipv6 protocol lookup
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/skbuff.h>
23 #include <linux/workqueue.h>
24 #include <net/addrconf.h>
25 #include <net/sock.h>
26 #include <net/tcp.h>
27 #include <net/udp.h>
28
29 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
30 #include <linux/netfilter_ipv6/ip6_tables.h>
31 #endif
32
33 #include <linux/netfilter/xt_socket.h>
34 #include "xt_qtaguid_internal.h"
35 #include "xt_qtaguid_print.h"
36
37 /*
38  * We only use the xt_socket funcs within a similar context to avoid unexpected
39  * return values.
40  */
41 #define XT_SOCKET_SUPPORTED_HOOKS \
42         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
43
44
45 static const char *module_procdirname = "xt_qtaguid";
46 static struct proc_dir_entry *xt_qtaguid_procdir;
47
48 static unsigned int proc_iface_perms = S_IRUGO;
49 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
50
51 static struct proc_dir_entry *xt_qtaguid_stats_file;
52 static unsigned int proc_stats_perms = S_IRUGO;
53 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
54
55 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
56 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
57 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
58 #else
59 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
60 #endif
61 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
62
63 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
64 #include <linux/android_aid.h>
65 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
66 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
67 #else
68 /* 0 means, don't limit anybody */
69 static gid_t proc_stats_readall_gid;
70 static gid_t proc_ctrl_write_gid;
71 #endif
72 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
73                    S_IRUGO | S_IWUSR);
74 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
75                    S_IRUGO | S_IWUSR);
76
77 /*
78  * Limit the number of active tags (via socket tags) for a given UID.
79  * Multiple processes could share the UID.
80  */
81 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
82 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
83
84 /*
85  * After the kernel has initiallized this module, it is still possible
86  * to make it passive.
87  * Setting passive to Y:
88  *  - the iface stats handling will not act on notifications.
89  *  - iptables matches will never match.
90  *  - ctrl commands silently succeed.
91  *  - stats are always empty.
92  * This is mostly usefull when a bug is suspected.
93  */
94 static bool module_passive;
95 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
96
97 /*
98  * Control how qtaguid data is tracked per proc/uid.
99  * Setting tag_tracking_passive to Y:
100  *  - don't create proc specific structs to track tags
101  *  - don't check that active tag stats exceed some limits.
102  *  - don't clean up socket tags on process exits.
103  * This is mostly usefull when a bug is suspected.
104  */
105 static bool qtu_proc_handling_passive;
106 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
107                    S_IRUGO | S_IWUSR);
108
109 #define QTU_DEV_NAME "xt_qtaguid"
110
111 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
112 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
113
114 /*---------------------------------------------------------------------------*/
115 static const char *iface_stat_procdirname = "iface_stat";
116 static struct proc_dir_entry *iface_stat_procdir;
117 static const char *iface_stat_all_procfilename = "iface_stat_all";
118 static struct proc_dir_entry *iface_stat_all_procfile;
119
120 /*
121  * Ordering of locks:
122  *  outer locks:
123  *    iface_stat_list_lock
124  *    sock_tag_list_lock
125  *  inner locks:
126  *    uid_tag_data_tree_lock
127  *    tag_counter_set_list_lock
128  * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
129  * is acquired.
130  *
131  * Call tree with all lock holders as of 2011-09-25:
132  *
133  * iface_stat_all_proc_read()
134  *   iface_stat_list_lock
135  *     (struct iface_stat)
136  *
137  * qtaguid_ctrl_proc_read()
138  *   sock_tag_list_lock
139  *     (sock_tag_tree)
140  *     (struct proc_qtu_data->sock_tag_list)
141  *   prdebug_full_state()
142  *     sock_tag_list_lock
143  *       (sock_tag_tree)
144  *     uid_tag_data_tree_lock
145  *       (uid_tag_data_tree)
146  *       (proc_qtu_data_tree)
147  *     iface_stat_list_lock
148  *
149  * qtaguid_stats_proc_read()
150  *   iface_stat_list_lock
151  *     struct iface_stat->tag_stat_list_lock
152  *
153  * qtudev_open()
154  *   uid_tag_data_tree_lock
155  *
156  * qtudev_release()
157  *   sock_tag_data_list_lock
158  *     uid_tag_data_tree_lock
159  *   prdebug_full_state()
160  *     sock_tag_list_lock
161  *     uid_tag_data_tree_lock
162  *     iface_stat_list_lock
163  *
164  * iface_netdev_event_handler()
165  *   iface_stat_create()
166  *     iface_stat_list_lock
167  *   iface_stat_update()
168  *     iface_stat_list_lock
169  *
170  * iface_inetaddr_event_handler()
171  *   iface_stat_create()
172  *     iface_stat_list_lock
173  *   iface_stat_update()
174  *     iface_stat_list_lock
175  *
176  * iface_inet6addr_event_handler()
177  *   iface_stat_create_ipv6()
178  *     iface_stat_list_lock
179  *   iface_stat_update()
180  *     iface_stat_list_lock
181  *
182  * qtaguid_mt()
183  *   account_for_uid()
184  *     if_tag_stat_update()
185  *       get_sock_stat()
186  *         sock_tag_list_lock
187  *       struct iface_stat->tag_stat_list_lock
188  *         tag_stat_update()
189  *           get_active_counter_set()
190  *             tag_counter_set_list_lock
191  *         tag_stat_update()
192  *           get_active_counter_set()
193  *             tag_counter_set_list_lock
194  *
195  *
196  * qtaguid_ctrl_parse()
197  *   ctrl_cmd_delete()
198  *     sock_tag_list_lock
199  *     tag_counter_set_list_lock
200  *     iface_stat_list_lock
201  *       struct iface_stat->tag_stat_list_lock
202  *     uid_tag_data_tree_lock
203  *   ctrl_cmd_counter_set()
204  *     tag_counter_set_list_lock
205  *   ctrl_cmd_tag()
206  *     sock_tag_list_lock
207  *       (sock_tag_tree)
208  *       get_tag_ref()
209  *         uid_tag_data_tree_lock
210  *           (uid_tag_data_tree)
211  *       uid_tag_data_tree_lock
212  *         (proc_qtu_data_tree)
213  *   ctrl_cmd_untag()
214  *     sock_tag_list_lock
215  *     uid_tag_data_tree_lock
216  *
217  */
218 static LIST_HEAD(iface_stat_list);
219 static DEFINE_SPINLOCK(iface_stat_list_lock);
220
221 static struct rb_root sock_tag_tree = RB_ROOT;
222 static DEFINE_SPINLOCK(sock_tag_list_lock);
223
224 static struct rb_root tag_counter_set_tree = RB_ROOT;
225 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
226
227 static struct rb_root uid_tag_data_tree = RB_ROOT;
228 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
229
230 static struct rb_root proc_qtu_data_tree = RB_ROOT;
231 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
232
233 static struct qtaguid_event_counts qtu_events;
234 /*----------------------------------------------*/
235 static bool can_manipulate_uids(void)
236 {
237         /* root pwnd */
238         return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
239                 || in_egroup_p(proc_ctrl_write_gid);
240 }
241
242 static bool can_impersonate_uid(uid_t uid)
243 {
244         return uid == current_fsuid() || can_manipulate_uids();
245 }
246
247 static bool can_read_other_uid_stats(uid_t uid)
248 {
249         /* root pwnd */
250         return unlikely(!current_fsuid()) || uid == current_fsuid()
251                 || unlikely(!proc_stats_readall_gid)
252                 || in_egroup_p(proc_stats_readall_gid);
253 }
254
255 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
256                                   enum ifs_tx_rx direction,
257                                   enum ifs_proto ifs_proto,
258                                   int bytes,
259                                   int packets)
260 {
261         counters->bpc[set][direction][ifs_proto].bytes += bytes;
262         counters->bpc[set][direction][ifs_proto].packets += packets;
263 }
264
265 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
266                                     int set,
267                                     enum ifs_tx_rx direction)
268 {
269         return counters->bpc[set][direction][IFS_TCP].bytes
270                 + counters->bpc[set][direction][IFS_UDP].bytes
271                 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
272 }
273
274 static inline uint64_t dc_sum_packets(struct data_counters *counters,
275                                       int set,
276                                       enum ifs_tx_rx direction)
277 {
278         return counters->bpc[set][direction][IFS_TCP].packets
279                 + counters->bpc[set][direction][IFS_UDP].packets
280                 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
281 }
282
283 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
284 {
285         struct rb_node *node = root->rb_node;
286
287         while (node) {
288                 struct tag_node *data = rb_entry(node, struct tag_node, node);
289                 int result;
290                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
291                          " node=%p data=%p\n", tag, node, data);
292                 result = tag_compare(tag, data->tag);
293                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
294                          " data.tag=0x%llx (uid=%u) res=%d\n",
295                          tag, data->tag, get_uid_from_tag(data->tag), result);
296                 if (result < 0)
297                         node = node->rb_left;
298                 else if (result > 0)
299                         node = node->rb_right;
300                 else
301                         return data;
302         }
303         return NULL;
304 }
305
306 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
307 {
308         struct rb_node **new = &(root->rb_node), *parent = NULL;
309
310         /* Figure out where to put new node */
311         while (*new) {
312                 struct tag_node *this = rb_entry(*new, struct tag_node,
313                                                  node);
314                 int result = tag_compare(data->tag, this->tag);
315                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
316                          " (uid=%u)\n", __func__,
317                          this->tag,
318                          get_uid_from_tag(this->tag));
319                 parent = *new;
320                 if (result < 0)
321                         new = &((*new)->rb_left);
322                 else if (result > 0)
323                         new = &((*new)->rb_right);
324                 else
325                         BUG();
326         }
327
328         /* Add new node and rebalance tree. */
329         rb_link_node(&data->node, parent, new);
330         rb_insert_color(&data->node, root);
331 }
332
333 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
334 {
335         tag_node_tree_insert(&data->tn, root);
336 }
337
338 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
339 {
340         struct tag_node *node = tag_node_tree_search(root, tag);
341         if (!node)
342                 return NULL;
343         return rb_entry(&node->node, struct tag_stat, tn.node);
344 }
345
346 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
347                                         struct rb_root *root)
348 {
349         tag_node_tree_insert(&data->tn, root);
350 }
351
352 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
353                                                            tag_t tag)
354 {
355         struct tag_node *node = tag_node_tree_search(root, tag);
356         if (!node)
357                 return NULL;
358         return rb_entry(&node->node, struct tag_counter_set, tn.node);
359
360 }
361
362 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
363 {
364         tag_node_tree_insert(&data->tn, root);
365 }
366
367 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
368 {
369         struct tag_node *node = tag_node_tree_search(root, tag);
370         if (!node)
371                 return NULL;
372         return rb_entry(&node->node, struct tag_ref, tn.node);
373 }
374
375 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
376                                              const struct sock *sk)
377 {
378         struct rb_node *node = root->rb_node;
379
380         while (node) {
381                 struct sock_tag *data = rb_entry(node, struct sock_tag,
382                                                  sock_node);
383                 if (sk < data->sk)
384                         node = node->rb_left;
385                 else if (sk > data->sk)
386                         node = node->rb_right;
387                 else
388                         return data;
389         }
390         return NULL;
391 }
392
393 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
394 {
395         struct rb_node **new = &(root->rb_node), *parent = NULL;
396
397         /* Figure out where to put new node */
398         while (*new) {
399                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
400                                                  sock_node);
401                 parent = *new;
402                 if (data->sk < this->sk)
403                         new = &((*new)->rb_left);
404                 else if (data->sk > this->sk)
405                         new = &((*new)->rb_right);
406                 else
407                         BUG();
408         }
409
410         /* Add new node and rebalance tree. */
411         rb_link_node(&data->sock_node, parent, new);
412         rb_insert_color(&data->sock_node, root);
413 }
414
415 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
416 {
417         struct rb_node *node;
418         struct sock_tag *st_entry;
419
420         node = rb_first(st_to_free_tree);
421         while (node) {
422                 st_entry = rb_entry(node, struct sock_tag, sock_node);
423                 node = rb_next(node);
424                 CT_DEBUG("qtaguid: %s(): "
425                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
426                          st_entry->sk,
427                          st_entry->tag,
428                          get_uid_from_tag(st_entry->tag));
429                 rb_erase(&st_entry->sock_node, st_to_free_tree);
430                 sockfd_put(st_entry->socket);
431                 kfree(st_entry);
432         }
433 }
434
435 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
436                                                        const pid_t pid)
437 {
438         struct rb_node *node = root->rb_node;
439
440         while (node) {
441                 struct proc_qtu_data *data = rb_entry(node,
442                                                       struct proc_qtu_data,
443                                                       node);
444                 if (pid < data->pid)
445                         node = node->rb_left;
446                 else if (pid > data->pid)
447                         node = node->rb_right;
448                 else
449                         return data;
450         }
451         return NULL;
452 }
453
454 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
455                                       struct rb_root *root)
456 {
457         struct rb_node **new = &(root->rb_node), *parent = NULL;
458
459         /* Figure out where to put new node */
460         while (*new) {
461                 struct proc_qtu_data *this = rb_entry(*new,
462                                                       struct proc_qtu_data,
463                                                       node);
464                 parent = *new;
465                 if (data->pid < this->pid)
466                         new = &((*new)->rb_left);
467                 else if (data->pid > this->pid)
468                         new = &((*new)->rb_right);
469                 else
470                         BUG();
471         }
472
473         /* Add new node and rebalance tree. */
474         rb_link_node(&data->node, parent, new);
475         rb_insert_color(&data->node, root);
476 }
477
478 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
479                                      struct rb_root *root)
480 {
481         struct rb_node **new = &(root->rb_node), *parent = NULL;
482
483         /* Figure out where to put new node */
484         while (*new) {
485                 struct uid_tag_data *this = rb_entry(*new,
486                                                      struct uid_tag_data,
487                                                      node);
488                 parent = *new;
489                 if (data->uid < this->uid)
490                         new = &((*new)->rb_left);
491                 else if (data->uid > this->uid)
492                         new = &((*new)->rb_right);
493                 else
494                         BUG();
495         }
496
497         /* Add new node and rebalance tree. */
498         rb_link_node(&data->node, parent, new);
499         rb_insert_color(&data->node, root);
500 }
501
502 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
503                                                      uid_t uid)
504 {
505         struct rb_node *node = root->rb_node;
506
507         while (node) {
508                 struct uid_tag_data *data = rb_entry(node,
509                                                      struct uid_tag_data,
510                                                      node);
511                 if (uid < data->uid)
512                         node = node->rb_left;
513                 else if (uid > data->uid)
514                         node = node->rb_right;
515                 else
516                         return data;
517         }
518         return NULL;
519 }
520
521 /*
522  * Allocates a new uid_tag_data struct if needed.
523  * Returns a pointer to the found or allocated uid_tag_data.
524  * Returns a PTR_ERR on failures, and lock is not held.
525  * If found is not NULL:
526  *   sets *found to true if not allocated.
527  *   sets *found to false if allocated.
528  */
529 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
530 {
531         struct uid_tag_data *utd_entry;
532
533         /* Look for top level uid_tag_data for the UID */
534         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
535         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
536
537         if (found_res)
538                 *found_res = utd_entry;
539         if (utd_entry)
540                 return utd_entry;
541
542         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
543         if (!utd_entry) {
544                 pr_err("qtaguid: get_uid_data(%u): "
545                        "tag data alloc failed\n", uid);
546                 return ERR_PTR(-ENOMEM);
547         }
548
549         utd_entry->uid = uid;
550         utd_entry->tag_ref_tree = RB_ROOT;
551         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
552         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
553         return utd_entry;
554 }
555
556 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
557 static struct tag_ref *new_tag_ref(tag_t new_tag,
558                                    struct uid_tag_data *utd_entry)
559 {
560         struct tag_ref *tr_entry;
561         int res;
562
563         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
564                 pr_info("qtaguid: new_tag_ref(0x%llx): "
565                         "tag ref alloc quota exceeded. max=%d\n",
566                         new_tag, max_sock_tags);
567                 res = -EMFILE;
568                 goto err_res;
569
570         }
571
572         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
573         if (!tr_entry) {
574                 pr_err("qtaguid: new_tag_ref(0x%llx): "
575                        "tag ref alloc failed\n",
576                        new_tag);
577                 res = -ENOMEM;
578                 goto err_res;
579         }
580         tr_entry->tn.tag = new_tag;
581         /* tr_entry->num_sock_tags  handled by caller */
582         utd_entry->num_active_tags++;
583         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
584         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
585                  " inserted new tag ref %p\n",
586                  new_tag, tr_entry);
587         return tr_entry;
588
589 err_res:
590         return ERR_PTR(res);
591 }
592
593 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
594                                       struct uid_tag_data **utd_res)
595 {
596         struct uid_tag_data *utd_entry;
597         struct tag_ref *tr_entry;
598         bool found_utd;
599         uid_t uid = get_uid_from_tag(full_tag);
600
601         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
602                  full_tag, uid);
603
604         utd_entry = get_uid_data(uid, &found_utd);
605         if (IS_ERR_OR_NULL(utd_entry)) {
606                 if (utd_res)
607                         *utd_res = utd_entry;
608                 return NULL;
609         }
610
611         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
612         if (utd_res)
613                 *utd_res = utd_entry;
614         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
615                  full_tag, utd_entry, tr_entry);
616         return tr_entry;
617 }
618
619 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
620 static struct tag_ref *get_tag_ref(tag_t full_tag,
621                                    struct uid_tag_data **utd_res)
622 {
623         struct uid_tag_data *utd_entry;
624         struct tag_ref *tr_entry;
625
626         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
627                  full_tag);
628         spin_lock_bh(&uid_tag_data_tree_lock);
629         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
630         BUG_ON(IS_ERR_OR_NULL(utd_entry));
631         if (!tr_entry)
632                 tr_entry = new_tag_ref(full_tag, utd_entry);
633
634         spin_unlock_bh(&uid_tag_data_tree_lock);
635         if (utd_res)
636                 *utd_res = utd_entry;
637         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
638                  full_tag, utd_entry, tr_entry);
639         return tr_entry;
640 }
641
642 /* Checks and maybe frees the UID Tag Data entry */
643 static void put_utd_entry(struct uid_tag_data *utd_entry)
644 {
645         /* Are we done with the UID tag data entry? */
646         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
647                 !utd_entry->num_pqd) {
648                 DR_DEBUG("qtaguid: %s(): "
649                          "erase utd_entry=%p uid=%u "
650                          "by pid=%u tgid=%u uid=%u\n", __func__,
651                          utd_entry, utd_entry->uid,
652                          current->pid, current->tgid, current_fsuid());
653                 BUG_ON(utd_entry->num_active_tags);
654                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
655                 kfree(utd_entry);
656         } else {
657                 DR_DEBUG("qtaguid: %s(): "
658                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
659                          __func__, utd_entry, utd_entry->num_active_tags,
660                          utd_entry->num_pqd);
661                 BUG_ON(!(utd_entry->num_active_tags ||
662                          utd_entry->num_pqd));
663         }
664 }
665
666 /*
667  * If no sock_tags are using this tag_ref,
668  * decrements refcount of utd_entry, removes tr_entry
669  * from utd_entry->tag_ref_tree and frees.
670  */
671 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
672                                         struct uid_tag_data *utd_entry)
673 {
674         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
675                  tr_entry, tr_entry->tn.tag,
676                  get_uid_from_tag(tr_entry->tn.tag));
677         if (!tr_entry->num_sock_tags) {
678                 BUG_ON(!utd_entry->num_active_tags);
679                 utd_entry->num_active_tags--;
680                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
681                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
682                 kfree(tr_entry);
683         }
684 }
685
686 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
687 {
688         struct rb_node *node;
689         struct tag_ref *tr_entry;
690         tag_t acct_tag;
691
692         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
693                  full_tag, get_uid_from_tag(full_tag));
694         acct_tag = get_atag_from_tag(full_tag);
695         node = rb_first(&utd_entry->tag_ref_tree);
696         while (node) {
697                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
698                 node = rb_next(node);
699                 if (!acct_tag || tr_entry->tn.tag == full_tag)
700                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
701         }
702 }
703
704 static int read_proc_u64(char *page, char **start, off_t off,
705                         int count, int *eof, void *data)
706 {
707         int len;
708         uint64_t value;
709         char *p = page;
710         uint64_t *iface_entry = data;
711
712         if (!data)
713                 return 0;
714
715         value = *iface_entry;
716         p += sprintf(p, "%llu\n", value);
717         len = (p - page) - off;
718         *eof = (len <= count) ? 1 : 0;
719         *start = page + off;
720         return len;
721 }
722
723 static int read_proc_bool(char *page, char **start, off_t off,
724                         int count, int *eof, void *data)
725 {
726         int len;
727         bool value;
728         char *p = page;
729         bool *bool_entry = data;
730
731         if (!data)
732                 return 0;
733
734         value = *bool_entry;
735         p += sprintf(p, "%u\n", value);
736         len = (p - page) - off;
737         *eof = (len <= count) ? 1 : 0;
738         *start = page + off;
739         return len;
740 }
741
742 static int get_active_counter_set(tag_t tag)
743 {
744         int active_set = 0;
745         struct tag_counter_set *tcs;
746
747         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
748                  " (uid=%u)\n",
749                  tag, get_uid_from_tag(tag));
750         /* For now we only handle UID tags for active sets */
751         tag = get_utag_from_tag(tag);
752         spin_lock_bh(&tag_counter_set_list_lock);
753         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
754         if (tcs)
755                 active_set = tcs->active_set;
756         spin_unlock_bh(&tag_counter_set_list_lock);
757         return active_set;
758 }
759
760 /*
761  * Find the entry for tracking the specified interface.
762  * Caller must hold iface_stat_list_lock
763  */
764 static struct iface_stat *get_iface_entry(const char *ifname)
765 {
766         struct iface_stat *iface_entry;
767
768         /* Find the entry for tracking the specified tag within the interface */
769         if (ifname == NULL) {
770                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
771                 return NULL;
772         }
773
774         /* Iterate over interfaces */
775         list_for_each_entry(iface_entry, &iface_stat_list, list) {
776                 if (!strcmp(ifname, iface_entry->ifname))
777                         goto done;
778         }
779         iface_entry = NULL;
780 done:
781         return iface_entry;
782 }
783
784 static int iface_stat_all_proc_read(char *page, char **num_items_returned,
785                                     off_t items_to_skip, int char_count,
786                                     int *eof, void *data)
787 {
788         char *outp = page;
789         int item_index = 0;
790         int len;
791         struct iface_stat *iface_entry;
792         struct rtnl_link_stats64 dev_stats, *stats;
793         struct rtnl_link_stats64 no_dev_stats = {0};
794
795         if (unlikely(module_passive)) {
796                 *eof = 1;
797                 return 0;
798         }
799
800         CT_DEBUG("qtaguid:proc iface_stat_all "
801                  "page=%p *num_items_returned=%p off=%ld "
802                  "char_count=%d *eof=%d\n", page, *num_items_returned,
803                  items_to_skip, char_count, *eof);
804
805         if (*eof)
806                 return 0;
807
808         /*
809          * This lock will prevent iface_stat_update() from changing active,
810          * and in turn prevent an interface from unregistering itself.
811          */
812         spin_lock_bh(&iface_stat_list_lock);
813         list_for_each_entry(iface_entry, &iface_stat_list, list) {
814                 if (item_index++ < items_to_skip)
815                         continue;
816
817                 if (iface_entry->active) {
818                         stats = dev_get_stats(iface_entry->net_dev,
819                                               &dev_stats);
820                 } else {
821                         stats = &no_dev_stats;
822                 }
823                 len = snprintf(outp, char_count,
824                                "%s %d "
825                                "%llu %llu %llu %llu "
826                                "%llu %llu %llu %llu\n",
827                                iface_entry->ifname,
828                                iface_entry->active,
829                                iface_entry->totals[IFS_RX].bytes,
830                                iface_entry->totals[IFS_RX].packets,
831                                iface_entry->totals[IFS_TX].bytes,
832                                iface_entry->totals[IFS_TX].packets,
833                                stats->rx_bytes, stats->rx_packets,
834                                stats->tx_bytes, stats->tx_packets);
835                 if (len >= char_count) {
836                         spin_unlock_bh(&iface_stat_list_lock);
837                         *outp = '\0';
838                         return outp - page;
839                 }
840                 outp += len;
841                 char_count -= len;
842                 (*num_items_returned)++;
843         }
844         spin_unlock_bh(&iface_stat_list_lock);
845
846         *eof = 1;
847         return outp - page;
848 }
849
850 static void iface_create_proc_worker(struct work_struct *work)
851 {
852         struct proc_dir_entry *proc_entry;
853         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
854                                                    iface_work);
855         struct iface_stat *new_iface  = isw->iface_entry;
856
857         /* iface_entries are not deleted, so safe to manipulate. */
858         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
859         if (IS_ERR_OR_NULL(proc_entry)) {
860                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
861                 kfree(isw);
862                 return;
863         }
864
865         new_iface->proc_ptr = proc_entry;
866
867         create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
868                         read_proc_u64, &new_iface->totals[IFS_TX].bytes);
869         create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
870                         read_proc_u64, &new_iface->totals[IFS_RX].bytes);
871         create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
872                         read_proc_u64, &new_iface->totals[IFS_TX].packets);
873         create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
874                         read_proc_u64, &new_iface->totals[IFS_RX].packets);
875         create_proc_read_entry("active", proc_iface_perms, proc_entry,
876                         read_proc_bool, &new_iface->active);
877
878         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
879                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
880         kfree(isw);
881 }
882
883 /*
884  * Will set the entry's active state, and
885  * update the net_dev accordingly also.
886  */
887 static void _iface_stat_set_active(struct iface_stat *entry,
888                                    struct net_device *net_dev,
889                                    bool activate)
890 {
891         if (activate) {
892                 entry->net_dev = net_dev;
893                 entry->active = true;
894                 IF_DEBUG("qtaguid: %s(%s): "
895                          "enable tracking. rfcnt=%d\n", __func__,
896                          entry->ifname,
897                          __this_cpu_read(*net_dev->pcpu_refcnt));
898         } else {
899                 entry->active = false;
900                 entry->net_dev = NULL;
901                 IF_DEBUG("qtaguid: %s(%s): "
902                          "disable tracking. rfcnt=%d\n", __func__,
903                          entry->ifname,
904                          __this_cpu_read(*net_dev->pcpu_refcnt));
905
906         }
907 }
908
909 /* Caller must hold iface_stat_list_lock */
910 static struct iface_stat *iface_alloc(struct net_device *net_dev)
911 {
912         struct iface_stat *new_iface;
913         struct iface_stat_work *isw;
914
915         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
916         if (new_iface == NULL) {
917                 pr_err("qtaguid: iface_stat: create(%s): "
918                        "iface_stat alloc failed\n", net_dev->name);
919                 return NULL;
920         }
921         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
922         if (new_iface->ifname == NULL) {
923                 pr_err("qtaguid: iface_stat: create(%s): "
924                        "ifname alloc failed\n", net_dev->name);
925                 kfree(new_iface);
926                 return NULL;
927         }
928         spin_lock_init(&new_iface->tag_stat_list_lock);
929         new_iface->tag_stat_tree = RB_ROOT;
930         _iface_stat_set_active(new_iface, net_dev, true);
931
932         /*
933          * ipv6 notifier chains are atomic :(
934          * No create_proc_read_entry() for you!
935          */
936         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
937         if (!isw) {
938                 pr_err("qtaguid: iface_stat: create(%s): "
939                        "work alloc failed\n", new_iface->ifname);
940                 _iface_stat_set_active(new_iface, net_dev, false);
941                 kfree(new_iface->ifname);
942                 kfree(new_iface);
943                 return NULL;
944         }
945         isw->iface_entry = new_iface;
946         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
947         schedule_work(&isw->iface_work);
948         list_add(&new_iface->list, &iface_stat_list);
949         return new_iface;
950 }
951
952 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
953                                                struct iface_stat *iface)
954 {
955         struct rtnl_link_stats64 dev_stats, *stats;
956         bool stats_rewound;
957
958         stats = dev_get_stats(net_dev, &dev_stats);
959         /* No empty packets */
960         stats_rewound =
961                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
962                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
963
964         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
965                  "bytes rx/tx=%llu/%llu "
966                  "active=%d last_known=%d "
967                  "stats_rewound=%d\n", __func__,
968                  net_dev ? net_dev->name : "?",
969                  iface, net_dev,
970                  stats->rx_bytes, stats->tx_bytes,
971                  iface->active, iface->last_known_valid, stats_rewound);
972
973         if (iface->active && iface->last_known_valid && stats_rewound) {
974                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
975                              "iface reset its stats unexpectedly\n", __func__,
976                              net_dev->name);
977
978                 iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
979                 iface->totals[IFS_TX].packets +=
980                         iface->last_known[IFS_TX].packets;
981                 iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
982                 iface->totals[IFS_RX].packets +=
983                         iface->last_known[IFS_RX].packets;
984                 iface->last_known_valid = false;
985                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
986                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
987                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
988                          iface->last_known[IFS_TX].bytes);
989         }
990 }
991
992 /*
993  * Create a new entry for tracking the specified interface.
994  * Do nothing if the entry already exists.
995  * Called when an interface is configured with a valid IP address.
996  */
997 static void iface_stat_create(struct net_device *net_dev,
998                               struct in_ifaddr *ifa)
999 {
1000         struct in_device *in_dev = NULL;
1001         const char *ifname;
1002         struct iface_stat *entry;
1003         __be32 ipaddr = 0;
1004         struct iface_stat *new_iface;
1005
1006         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1007                  net_dev ? net_dev->name : "?",
1008                  ifa, net_dev);
1009         if (!net_dev) {
1010                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1011                 return;
1012         }
1013
1014         ifname = net_dev->name;
1015         if (!ifa) {
1016                 in_dev = in_dev_get(net_dev);
1017                 if (!in_dev) {
1018                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1019                                ifname);
1020                         return;
1021                 }
1022                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1023                          ifname, in_dev);
1024                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1025                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
1026                                  "ifa=%p ifa_label=%s\n",
1027                                  ifname, ifa,
1028                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
1029                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1030                                 break;
1031                 }
1032         }
1033
1034         if (!ifa) {
1035                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1036                          ifname);
1037                 goto done_put;
1038         }
1039         ipaddr = ifa->ifa_local;
1040
1041         spin_lock_bh(&iface_stat_list_lock);
1042         entry = get_iface_entry(ifname);
1043         if (entry != NULL) {
1044                 bool activate = !ipv4_is_loopback(ipaddr);
1045                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1046                          ifname, entry);
1047                 iface_check_stats_reset_and_adjust(net_dev, entry);
1048                 _iface_stat_set_active(entry, net_dev, activate);
1049                 IF_DEBUG("qtaguid: %s(%s): "
1050                          "tracking now %d on ip=%pI4\n", __func__,
1051                          entry->ifname, activate, &ipaddr);
1052                 goto done_unlock_put;
1053         } else if (ipv4_is_loopback(ipaddr)) {
1054                 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1055                          "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
1056                 goto done_unlock_put;
1057         }
1058
1059         new_iface = iface_alloc(net_dev);
1060         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1061                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1062 done_unlock_put:
1063         spin_unlock_bh(&iface_stat_list_lock);
1064 done_put:
1065         if (in_dev)
1066                 in_dev_put(in_dev);
1067 }
1068
1069 static void iface_stat_create_ipv6(struct net_device *net_dev,
1070                                    struct inet6_ifaddr *ifa)
1071 {
1072         struct in_device *in_dev;
1073         const char *ifname;
1074         struct iface_stat *entry;
1075         struct iface_stat *new_iface;
1076         int addr_type;
1077
1078         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1079                  ifa, net_dev, net_dev ? net_dev->name : "");
1080         if (!net_dev) {
1081                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1082                 return;
1083         }
1084         ifname = net_dev->name;
1085
1086         in_dev = in_dev_get(net_dev);
1087         if (!in_dev) {
1088                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1089                        ifname);
1090                 return;
1091         }
1092
1093         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1094                  ifname, in_dev);
1095
1096         if (!ifa) {
1097                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1098                          ifname);
1099                 goto done_put;
1100         }
1101         addr_type = ipv6_addr_type(&ifa->addr);
1102
1103         spin_lock_bh(&iface_stat_list_lock);
1104         entry = get_iface_entry(ifname);
1105         if (entry != NULL) {
1106                 bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
1107                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1108                          ifname, entry);
1109                 iface_check_stats_reset_and_adjust(net_dev, entry);
1110                 _iface_stat_set_active(entry, net_dev, activate);
1111                 IF_DEBUG("qtaguid: %s(%s): "
1112                          "tracking now %d on ip=%pI6c\n", __func__,
1113                          entry->ifname, activate, &ifa->addr);
1114                 goto done_unlock_put;
1115         } else if (addr_type & IPV6_ADDR_LOOPBACK) {
1116                 IF_DEBUG("qtaguid: %s(%s): "
1117                          "ignore loopback dev. ip=%pI6c\n", __func__,
1118                          ifname, &ifa->addr);
1119                 goto done_unlock_put;
1120         }
1121
1122         new_iface = iface_alloc(net_dev);
1123         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1124                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1125
1126 done_unlock_put:
1127         spin_unlock_bh(&iface_stat_list_lock);
1128 done_put:
1129         in_dev_put(in_dev);
1130 }
1131
1132 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1133 {
1134         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1135         return sock_tag_tree_search(&sock_tag_tree, sk);
1136 }
1137
1138 static struct sock_tag *get_sock_stat(const struct sock *sk)
1139 {
1140         struct sock_tag *sock_tag_entry;
1141         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1142         if (!sk)
1143                 return NULL;
1144         spin_lock_bh(&sock_tag_list_lock);
1145         sock_tag_entry = get_sock_stat_nl(sk);
1146         spin_unlock_bh(&sock_tag_list_lock);
1147         return sock_tag_entry;
1148 }
1149
1150 static void
1151 data_counters_update(struct data_counters *dc, int set,
1152                      enum ifs_tx_rx direction, int proto, int bytes)
1153 {
1154         switch (proto) {
1155         case IPPROTO_TCP:
1156                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1157                 break;
1158         case IPPROTO_UDP:
1159                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1160                 break;
1161         case IPPROTO_IP:
1162         default:
1163                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1164                                     1);
1165                 break;
1166         }
1167 }
1168
1169 /*
1170  * Update stats for the specified interface. Do nothing if the entry
1171  * does not exist (when a device was never configured with an IP address).
1172  * Called when an device is being unregistered.
1173  */
1174 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1175 {
1176         struct rtnl_link_stats64 dev_stats, *stats;
1177         struct iface_stat *entry;
1178
1179         stats = dev_get_stats(net_dev, &dev_stats);
1180         spin_lock_bh(&iface_stat_list_lock);
1181         entry = get_iface_entry(net_dev->name);
1182         if (entry == NULL) {
1183                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1184                          net_dev->name);
1185                 spin_unlock_bh(&iface_stat_list_lock);
1186                 return;
1187         }
1188
1189         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1190                  net_dev->name, entry);
1191         if (!entry->active) {
1192                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1193                          net_dev->name);
1194                 spin_unlock_bh(&iface_stat_list_lock);
1195                 return;
1196         }
1197
1198         if (stash_only) {
1199                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1200                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1201                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1202                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1203                 entry->last_known_valid = true;
1204                 IF_DEBUG("qtaguid: %s(%s): "
1205                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1206                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1207                 spin_unlock_bh(&iface_stat_list_lock);
1208                 return;
1209         }
1210         entry->totals[IFS_TX].bytes += stats->tx_bytes;
1211         entry->totals[IFS_TX].packets += stats->tx_packets;
1212         entry->totals[IFS_RX].bytes += stats->rx_bytes;
1213         entry->totals[IFS_RX].packets += stats->rx_packets;
1214         /* We don't need the last_known[] anymore */
1215         entry->last_known_valid = false;
1216         _iface_stat_set_active(entry, net_dev, false);
1217         IF_DEBUG("qtaguid: %s(%s): "
1218                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1219                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1220         spin_unlock_bh(&iface_stat_list_lock);
1221 }
1222
1223 static void tag_stat_update(struct tag_stat *tag_entry,
1224                         enum ifs_tx_rx direction, int proto, int bytes)
1225 {
1226         int active_set;
1227         active_set = get_active_counter_set(tag_entry->tn.tag);
1228         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1229                  "dir=%d proto=%d bytes=%d)\n",
1230                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1231                  active_set, direction, proto, bytes);
1232         data_counters_update(&tag_entry->counters, active_set, direction,
1233                              proto, bytes);
1234         if (tag_entry->parent_counters)
1235                 data_counters_update(tag_entry->parent_counters, active_set,
1236                                      direction, proto, bytes);
1237 }
1238
1239 /*
1240  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1241  * the interface.
1242  * iface_entry->tag_stat_list_lock should be held.
1243  */
1244 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1245                                            tag_t tag)
1246 {
1247         struct tag_stat *new_tag_stat_entry = NULL;
1248         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1249                  " (uid=%u)\n", __func__,
1250                  iface_entry, tag, get_uid_from_tag(tag));
1251         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1252         if (!new_tag_stat_entry) {
1253                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1254                 goto done;
1255         }
1256         new_tag_stat_entry->tn.tag = tag;
1257         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1258 done:
1259         return new_tag_stat_entry;
1260 }
1261
1262 static void if_tag_stat_update(const char *ifname, uid_t uid,
1263                                const struct sock *sk, enum ifs_tx_rx direction,
1264                                int proto, int bytes)
1265 {
1266         struct tag_stat *tag_stat_entry;
1267         tag_t tag, acct_tag;
1268         tag_t uid_tag;
1269         struct data_counters *uid_tag_counters;
1270         struct sock_tag *sock_tag_entry;
1271         struct iface_stat *iface_entry;
1272         struct tag_stat *new_tag_stat = NULL;
1273         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1274                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1275                  ifname, uid, sk, direction, proto, bytes);
1276
1277
1278         iface_entry = get_iface_entry(ifname);
1279         if (!iface_entry) {
1280                 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1281                        ifname);
1282                 return;
1283         }
1284         /* It is ok to process data when an iface_entry is inactive */
1285
1286         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1287                  ifname, iface_entry);
1288
1289         /*
1290          * Look for a tagged sock.
1291          * It will have an acct_uid.
1292          */
1293         sock_tag_entry = get_sock_stat(sk);
1294         if (sock_tag_entry) {
1295                 tag = sock_tag_entry->tag;
1296                 acct_tag = get_atag_from_tag(tag);
1297                 uid_tag = get_utag_from_tag(tag);
1298         } else {
1299                 acct_tag = make_atag_from_value(0);
1300                 tag = combine_atag_with_uid(acct_tag, uid);
1301                 uid_tag = make_tag_from_uid(uid);
1302         }
1303         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1304                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1305                  tag, get_uid_from_tag(tag), iface_entry);
1306         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1307         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1308
1309         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1310                                               tag);
1311         if (tag_stat_entry) {
1312                 /*
1313                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1314                  * {0, uid_tag} will also get updated.
1315                  */
1316                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1317                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1318                 return;
1319         }
1320
1321         /* Loop over tag list under this interface for {0,uid_tag} */
1322         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1323                                               uid_tag);
1324         if (!tag_stat_entry) {
1325                 /* Here: the base uid_tag did not exist */
1326                 /*
1327                  * No parent counters. So
1328                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1329                  */
1330                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1331                 uid_tag_counters = &new_tag_stat->counters;
1332         } else {
1333                 uid_tag_counters = &tag_stat_entry->counters;
1334         }
1335
1336         if (acct_tag) {
1337                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1338                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1339                 new_tag_stat->parent_counters = uid_tag_counters;
1340         } else {
1341                 /*
1342                  * For new_tag_stat to be still NULL here would require:
1343                  *  {0, uid_tag} exists
1344                  *  and {acct_tag, uid_tag} doesn't exist
1345                  *  AND acct_tag == 0.
1346                  * Impossible. This reassures us that new_tag_stat
1347                  * below will always be assigned.
1348                  */
1349                 BUG_ON(!new_tag_stat);
1350         }
1351         tag_stat_update(new_tag_stat, direction, proto, bytes);
1352         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1353 }
1354
1355 static int iface_netdev_event_handler(struct notifier_block *nb,
1356                                       unsigned long event, void *ptr) {
1357         struct net_device *dev = ptr;
1358
1359         if (unlikely(module_passive))
1360                 return NOTIFY_DONE;
1361
1362         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1363                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1364                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1365
1366         switch (event) {
1367         case NETDEV_UP:
1368                 iface_stat_create(dev, NULL);
1369                 atomic64_inc(&qtu_events.iface_events);
1370                 break;
1371         case NETDEV_DOWN:
1372         case NETDEV_UNREGISTER:
1373                 iface_stat_update(dev, event == NETDEV_DOWN);
1374                 atomic64_inc(&qtu_events.iface_events);
1375                 break;
1376         }
1377         return NOTIFY_DONE;
1378 }
1379
1380 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1381                                          unsigned long event, void *ptr)
1382 {
1383         struct inet6_ifaddr *ifa = ptr;
1384         struct net_device *dev;
1385
1386         if (unlikely(module_passive))
1387                 return NOTIFY_DONE;
1388
1389         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1390                  "ev=0x%lx/%s ifa=%p\n",
1391                  event, netdev_evt_str(event), ifa);
1392
1393         switch (event) {
1394         case NETDEV_UP:
1395                 BUG_ON(!ifa || !ifa->idev);
1396                 dev = (struct net_device *)ifa->idev->dev;
1397                 iface_stat_create_ipv6(dev, ifa);
1398                 atomic64_inc(&qtu_events.iface_events);
1399                 break;
1400         case NETDEV_DOWN:
1401         case NETDEV_UNREGISTER:
1402                 BUG_ON(!ifa || !ifa->idev);
1403                 dev = (struct net_device *)ifa->idev->dev;
1404                 iface_stat_update(dev, event == NETDEV_DOWN);
1405                 atomic64_inc(&qtu_events.iface_events);
1406                 break;
1407         }
1408         return NOTIFY_DONE;
1409 }
1410
1411 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1412                                         unsigned long event, void *ptr)
1413 {
1414         struct in_ifaddr *ifa = ptr;
1415         struct net_device *dev;
1416
1417         if (unlikely(module_passive))
1418                 return NOTIFY_DONE;
1419
1420         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1421                  "ev=0x%lx/%s ifa=%p\n",
1422                  event, netdev_evt_str(event), ifa);
1423
1424         switch (event) {
1425         case NETDEV_UP:
1426                 BUG_ON(!ifa || !ifa->ifa_dev);
1427                 dev = ifa->ifa_dev->dev;
1428                 iface_stat_create(dev, ifa);
1429                 atomic64_inc(&qtu_events.iface_events);
1430                 break;
1431         case NETDEV_DOWN:
1432         case NETDEV_UNREGISTER:
1433                 BUG_ON(!ifa || !ifa->ifa_dev);
1434                 dev = ifa->ifa_dev->dev;
1435                 iface_stat_update(dev, event == NETDEV_DOWN);
1436                 atomic64_inc(&qtu_events.iface_events);
1437                 break;
1438         }
1439         return NOTIFY_DONE;
1440 }
1441
1442 static struct notifier_block iface_netdev_notifier_blk = {
1443         .notifier_call = iface_netdev_event_handler,
1444 };
1445
1446 static struct notifier_block iface_inetaddr_notifier_blk = {
1447         .notifier_call = iface_inetaddr_event_handler,
1448 };
1449
1450 static struct notifier_block iface_inet6addr_notifier_blk = {
1451         .notifier_call = iface_inet6addr_event_handler,
1452 };
1453
1454 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1455 {
1456         int err;
1457
1458         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1459         if (!iface_stat_procdir) {
1460                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1461                 err = -1;
1462                 goto err;
1463         }
1464
1465         iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1466                                                     proc_iface_perms,
1467                                                     parent_procdir);
1468         if (!iface_stat_all_procfile) {
1469                 pr_err("qtaguid: iface_stat: init "
1470                        " failed to create stat_all proc entry\n");
1471                 err = -1;
1472                 goto err_zap_entry;
1473         }
1474         iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
1475
1476
1477         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1478         if (err) {
1479                 pr_err("qtaguid: iface_stat: init "
1480                        "failed to register dev event handler\n");
1481                 goto err_zap_all_stats_entry;
1482         }
1483         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1484         if (err) {
1485                 pr_err("qtaguid: iface_stat: init "
1486                        "failed to register ipv4 dev event handler\n");
1487                 goto err_unreg_nd;
1488         }
1489
1490         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1491         if (err) {
1492                 pr_err("qtaguid: iface_stat: init "
1493                        "failed to register ipv6 dev event handler\n");
1494                 goto err_unreg_ip4_addr;
1495         }
1496         return 0;
1497
1498 err_unreg_ip4_addr:
1499         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1500 err_unreg_nd:
1501         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1502 err_zap_all_stats_entry:
1503         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1504 err_zap_entry:
1505         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1506 err:
1507         return err;
1508 }
1509
1510 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1511                                     struct xt_action_param *par)
1512 {
1513         struct sock *sk;
1514         unsigned int hook_mask = (1 << par->hooknum);
1515
1516         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1517                  par->hooknum, par->family);
1518
1519         /*
1520          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1521          * return garbage SKs.
1522          */
1523         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1524                 return NULL;
1525
1526         switch (par->family) {
1527         case NFPROTO_IPV6:
1528                 sk = xt_socket_get6_sk(skb, par);
1529                 break;
1530         case NFPROTO_IPV4:
1531                 sk = xt_socket_get4_sk(skb, par);
1532                 break;
1533         default:
1534                 return NULL;
1535         }
1536
1537         /*
1538          * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1539          * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1540          * Not fixed in 3.0-r3 :(
1541          */
1542         if (sk) {
1543                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1544                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1545                 if (sk->sk_state  == TCP_TIME_WAIT) {
1546                         xt_socket_put_sk(sk);
1547                         sk = NULL;
1548                 }
1549         }
1550         return sk;
1551 }
1552
1553 static int ipx_proto(const struct sk_buff *skb,
1554                      struct xt_action_param *par)
1555 {
1556         int thoff = 0, tproto;
1557
1558         switch (par->family) {
1559         case NFPROTO_IPV6:
1560                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1561                 if (tproto < 0)
1562                         MT_DEBUG("%s(): transport header not found in ipv6"
1563                                  " skb=%p\n", __func__, skb);
1564                 break;
1565         case NFPROTO_IPV4:
1566                 tproto = ip_hdr(skb)->protocol;
1567                 break;
1568         default:
1569                 tproto = IPPROTO_RAW;
1570         }
1571         return tproto;
1572 }
1573
1574 static void account_for_uid(const struct sk_buff *skb,
1575                             const struct sock *alternate_sk, uid_t uid,
1576                             struct xt_action_param *par)
1577 {
1578         const struct net_device *el_dev;
1579
1580         if (!skb->dev) {
1581                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1582                 el_dev = par->in ? : par->out;
1583         } else {
1584                 const struct net_device *other_dev;
1585                 el_dev = skb->dev;
1586                 other_dev = par->in ? : par->out;
1587                 if (el_dev != other_dev) {
1588                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1589                                 "par->(in/out)=%p %s\n",
1590                                 par->hooknum, el_dev, el_dev->name, other_dev,
1591                                 other_dev->name);
1592                 }
1593         }
1594
1595         if (unlikely(!el_dev)) {
1596                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1597         } else if (unlikely(!el_dev->name)) {
1598                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1599         } else {
1600                 int proto = ipx_proto(skb, par);
1601                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1602                          par->hooknum, el_dev->name, el_dev->type,
1603                          par->family, proto);
1604
1605                 if_tag_stat_update(el_dev->name, uid,
1606                                 skb->sk ? skb->sk : alternate_sk,
1607                                 par->in ? IFS_RX : IFS_TX,
1608                                 proto, skb->len);
1609         }
1610 }
1611
1612 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1613 {
1614         const struct xt_qtaguid_match_info *info = par->matchinfo;
1615         const struct file *filp;
1616         bool got_sock = false;
1617         struct sock *sk;
1618         uid_t sock_uid;
1619         bool res;
1620
1621         if (unlikely(module_passive))
1622                 return (info->match ^ info->invert) == 0;
1623
1624         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1625                  par->hooknum, skb, par->in, par->out, par->family);
1626
1627         atomic64_inc(&qtu_events.match_calls);
1628         if (skb == NULL) {
1629                 res = (info->match ^ info->invert) == 0;
1630                 goto ret_res;
1631         }
1632
1633         sk = skb->sk;
1634
1635         if (sk == NULL) {
1636                 /*
1637                  * A missing sk->sk_socket happens when packets are in-flight
1638                  * and the matching socket is already closed and gone.
1639                  */
1640                 sk = qtaguid_find_sk(skb, par);
1641                 /*
1642                  * If we got the socket from the find_sk(), we will need to put
1643                  * it back, as nf_tproxy_get_sock_v4() got it.
1644                  */
1645                 got_sock = sk;
1646                 if (sk)
1647                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1648                 else
1649                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1650         } else {
1651                 atomic64_inc(&qtu_events.match_found_sk);
1652         }
1653         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1654                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1655         if (sk != NULL) {
1656                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1657                         par->hooknum, sk, sk->sk_socket,
1658                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1659                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1660                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1661                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1662         }
1663
1664         if (sk == NULL || sk->sk_socket == NULL) {
1665                 /*
1666                  * Here, the qtaguid_find_sk() using connection tracking
1667                  * couldn't find the owner, so for now we just count them
1668                  * against the system.
1669                  */
1670                 /*
1671                  * TODO: unhack how to force just accounting.
1672                  * For now we only do iface stats when the uid-owner is not
1673                  * requested.
1674                  */
1675                 if (!(info->match & XT_QTAGUID_UID))
1676                         account_for_uid(skb, sk, 0, par);
1677                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1678                         par->hooknum,
1679                         sk ? sk->sk_socket : NULL);
1680                 res = (info->match ^ info->invert) == 0;
1681                 atomic64_inc(&qtu_events.match_no_sk);
1682                 goto put_sock_ret_res;
1683         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1684                 res = false;
1685                 goto put_sock_ret_res;
1686         }
1687         filp = sk->sk_socket->file;
1688         if (filp == NULL) {
1689                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1690                 account_for_uid(skb, sk, 0, par);
1691                 res = ((info->match ^ info->invert) &
1692                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1693                 atomic64_inc(&qtu_events.match_no_sk_file);
1694                 goto put_sock_ret_res;
1695         }
1696         sock_uid = filp->f_cred->fsuid;
1697         /*
1698          * TODO: unhack how to force just accounting.
1699          * For now we only do iface stats when the uid-owner is not requested
1700          */
1701         if (!(info->match & XT_QTAGUID_UID))
1702                 account_for_uid(skb, sk, sock_uid, par);
1703
1704         /*
1705          * The following two tests fail the match when:
1706          *    id not in range AND no inverted condition requested
1707          * or id     in range AND    inverted condition requested
1708          * Thus (!a && b) || (a && !b) == a ^ b
1709          */
1710         if (info->match & XT_QTAGUID_UID)
1711                 if ((filp->f_cred->fsuid >= info->uid_min &&
1712                      filp->f_cred->fsuid <= info->uid_max) ^
1713                     !(info->invert & XT_QTAGUID_UID)) {
1714                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1715                                  par->hooknum);
1716                         res = false;
1717                         goto put_sock_ret_res;
1718                 }
1719         if (info->match & XT_QTAGUID_GID)
1720                 if ((filp->f_cred->fsgid >= info->gid_min &&
1721                                 filp->f_cred->fsgid <= info->gid_max) ^
1722                         !(info->invert & XT_QTAGUID_GID)) {
1723                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1724                                 par->hooknum);
1725                         res = false;
1726                         goto put_sock_ret_res;
1727                 }
1728
1729         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1730         res = true;
1731
1732 put_sock_ret_res:
1733         if (got_sock)
1734                 xt_socket_put_sk(sk);
1735 ret_res:
1736         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1737         return res;
1738 }
1739
1740 #ifdef DDEBUG
1741 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1742 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1743 {
1744         va_list args;
1745         char *fmt_buff;
1746         char *buff;
1747
1748         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1749                 return;
1750
1751         fmt_buff = kasprintf(GFP_ATOMIC,
1752                              "qtaguid: %s(): %s {\n", __func__, fmt);
1753         BUG_ON(!fmt_buff);
1754         va_start(args, fmt);
1755         buff = kvasprintf(GFP_ATOMIC,
1756                           fmt_buff, args);
1757         BUG_ON(!buff);
1758         pr_debug("%s", buff);
1759         kfree(fmt_buff);
1760         kfree(buff);
1761         va_end(args);
1762
1763         spin_lock_bh(&sock_tag_list_lock);
1764         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1765         spin_unlock_bh(&sock_tag_list_lock);
1766
1767         spin_lock_bh(&sock_tag_list_lock);
1768         spin_lock_bh(&uid_tag_data_tree_lock);
1769         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1770         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1771         spin_unlock_bh(&uid_tag_data_tree_lock);
1772         spin_unlock_bh(&sock_tag_list_lock);
1773
1774         spin_lock_bh(&iface_stat_list_lock);
1775         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1776         spin_unlock_bh(&iface_stat_list_lock);
1777
1778         pr_debug("qtaguid: %s(): }\n", __func__);
1779 }
1780 #else
1781 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1782 #endif
1783
1784 /*
1785  * Procfs reader to get all active socket tags using style "1)" as described in
1786  * fs/proc/generic.c
1787  */
1788 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1789                                   off_t items_to_skip, int char_count, int *eof,
1790                                   void *data)
1791 {
1792         char *outp = page;
1793         int len;
1794         uid_t uid;
1795         struct rb_node *node;
1796         struct sock_tag *sock_tag_entry;
1797         int item_index = 0;
1798         int indent_level = 0;
1799         long f_count;
1800
1801         if (unlikely(module_passive)) {
1802                 *eof = 1;
1803                 return 0;
1804         }
1805
1806         if (*eof)
1807                 return 0;
1808
1809         CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1810                 page, items_to_skip, char_count, *eof);
1811
1812         spin_lock_bh(&sock_tag_list_lock);
1813         for (node = rb_first(&sock_tag_tree);
1814              node;
1815              node = rb_next(node)) {
1816                 if (item_index++ < items_to_skip)
1817                         continue;
1818                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1819                 uid = get_uid_from_tag(sock_tag_entry->tag);
1820                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1821                          "pid=%u\n",
1822                          sock_tag_entry->sk,
1823                          sock_tag_entry->tag,
1824                          uid,
1825                          sock_tag_entry->pid
1826                         );
1827                 f_count = atomic_long_read(
1828                         &sock_tag_entry->socket->file->f_count);
1829                 len = snprintf(outp, char_count,
1830                                "sock=%p tag=0x%llx (uid=%u) pid=%u "
1831                                "f_count=%lu\n",
1832                                sock_tag_entry->sk,
1833                                sock_tag_entry->tag, uid,
1834                                sock_tag_entry->pid, f_count);
1835                 if (len >= char_count) {
1836                         spin_unlock_bh(&sock_tag_list_lock);
1837                         *outp = '\0';
1838                         return outp - page;
1839                 }
1840                 outp += len;
1841                 char_count -= len;
1842                 (*num_items_returned)++;
1843         }
1844         spin_unlock_bh(&sock_tag_list_lock);
1845
1846         if (item_index++ >= items_to_skip) {
1847                 len = snprintf(outp, char_count,
1848                                "events: sockets_tagged=%llu "
1849                                "sockets_untagged=%llu "
1850                                "counter_set_changes=%llu "
1851                                "delete_cmds=%llu "
1852                                "iface_events=%llu "
1853                                "match_calls=%llu "
1854                                "match_found_sk=%llu "
1855                                "match_found_sk_in_ct=%llu "
1856                                "match_found_no_sk_in_ct=%llu "
1857                                "match_no_sk=%llu "
1858                                "match_no_sk_file=%llu\n",
1859                                atomic64_read(&qtu_events.sockets_tagged),
1860                                atomic64_read(&qtu_events.sockets_untagged),
1861                                atomic64_read(&qtu_events.counter_set_changes),
1862                                atomic64_read(&qtu_events.delete_cmds),
1863                                atomic64_read(&qtu_events.iface_events),
1864                                atomic64_read(&qtu_events.match_calls),
1865                                atomic64_read(&qtu_events.match_found_sk),
1866                                atomic64_read(&qtu_events.match_found_sk_in_ct),
1867                                atomic64_read(
1868                                        &qtu_events.match_found_no_sk_in_ct),
1869                                atomic64_read(&qtu_events.match_no_sk),
1870                                atomic64_read(&qtu_events.match_no_sk_file));
1871                 if (len >= char_count) {
1872                         *outp = '\0';
1873                         return outp - page;
1874                 }
1875                 outp += len;
1876                 char_count -= len;
1877                 (*num_items_returned)++;
1878         }
1879
1880         /* Count the following as part of the last item_index */
1881         if (item_index > items_to_skip) {
1882                 prdebug_full_state(indent_level, "proc ctrl");
1883         }
1884
1885         *eof = 1;
1886         return outp - page;
1887 }
1888
1889 /*
1890  * Delete socket tags, and stat tags associated with a given
1891  * accouting tag and uid.
1892  */
1893 static int ctrl_cmd_delete(const char *input)
1894 {
1895         char cmd;
1896         uid_t uid;
1897         uid_t entry_uid;
1898         tag_t acct_tag;
1899         tag_t tag;
1900         int res, argc;
1901         struct iface_stat *iface_entry;
1902         struct rb_node *node;
1903         struct sock_tag *st_entry;
1904         struct rb_root st_to_free_tree = RB_ROOT;
1905         struct tag_stat *ts_entry;
1906         struct tag_counter_set *tcs_entry;
1907         struct tag_ref *tr_entry;
1908         struct uid_tag_data *utd_entry;
1909
1910         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1911         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1912                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1913                  acct_tag, uid);
1914         if (argc < 2) {
1915                 res = -EINVAL;
1916                 goto err;
1917         }
1918         if (!valid_atag(acct_tag)) {
1919                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1920                 res = -EINVAL;
1921                 goto err;
1922         }
1923         if (argc < 3) {
1924                 uid = current_fsuid();
1925         } else if (!can_impersonate_uid(uid)) {
1926                 pr_info("qtaguid: ctrl_delete(%s): "
1927                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
1928                         input, current->pid, current->tgid, current_fsuid());
1929                 res = -EPERM;
1930                 goto err;
1931         }
1932
1933         tag = combine_atag_with_uid(acct_tag, uid);
1934         CT_DEBUG("qtaguid: ctrl_delete(%s): "
1935                  "looking for tag=0x%llx (uid=%u)\n",
1936                  input, tag, uid);
1937
1938         /* Delete socket tags */
1939         spin_lock_bh(&sock_tag_list_lock);
1940         node = rb_first(&sock_tag_tree);
1941         while (node) {
1942                 st_entry = rb_entry(node, struct sock_tag, sock_node);
1943                 entry_uid = get_uid_from_tag(st_entry->tag);
1944                 node = rb_next(node);
1945                 if (entry_uid != uid)
1946                         continue;
1947
1948                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
1949                          input, st_entry->tag, entry_uid);
1950
1951                 if (!acct_tag || st_entry->tag == tag) {
1952                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
1953                         /* Can't sockfd_put() within spinlock, do it later. */
1954                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
1955                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
1956                         BUG_ON(tr_entry->num_sock_tags <= 0);
1957                         tr_entry->num_sock_tags--;
1958                         /*
1959                          * TODO: remove if, and start failing.
1960                          * This is a hack to work around the fact that in some
1961                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
1962                          * and are trying to work around apps
1963                          * that didn't open the /dev/xt_qtaguid.
1964                          */
1965                         if (st_entry->list.next && st_entry->list.prev)
1966                                 list_del(&st_entry->list);
1967                 }
1968         }
1969         spin_unlock_bh(&sock_tag_list_lock);
1970
1971         sock_tag_tree_erase(&st_to_free_tree);
1972
1973         /* Delete tag counter-sets */
1974         spin_lock_bh(&tag_counter_set_list_lock);
1975         /* Counter sets are only on the uid tag, not full tag */
1976         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1977         if (tcs_entry) {
1978                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1979                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1980                          input,
1981                          tcs_entry->tn.tag,
1982                          get_uid_from_tag(tcs_entry->tn.tag),
1983                          tcs_entry->active_set);
1984                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1985                 kfree(tcs_entry);
1986         }
1987         spin_unlock_bh(&tag_counter_set_list_lock);
1988
1989         /*
1990          * If acct_tag is 0, then all entries belonging to uid are
1991          * erased.
1992          */
1993         spin_lock_bh(&iface_stat_list_lock);
1994         list_for_each_entry(iface_entry, &iface_stat_list, list) {
1995                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1996                 node = rb_first(&iface_entry->tag_stat_tree);
1997                 while (node) {
1998                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
1999                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2000                         node = rb_next(node);
2001
2002                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2003                                  "ts tag=0x%llx (uid=%u)\n",
2004                                  input, ts_entry->tn.tag, entry_uid);
2005
2006                         if (entry_uid != uid)
2007                                 continue;
2008                         if (!acct_tag || ts_entry->tn.tag == tag) {
2009                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2010                                          "erase ts: %s 0x%llx %u\n",
2011                                          input, iface_entry->ifname,
2012                                          get_atag_from_tag(ts_entry->tn.tag),
2013                                          entry_uid);
2014                                 rb_erase(&ts_entry->tn.node,
2015                                          &iface_entry->tag_stat_tree);
2016                                 kfree(ts_entry);
2017                         }
2018                 }
2019                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2020         }
2021         spin_unlock_bh(&iface_stat_list_lock);
2022
2023         /* Cleanup the uid_tag_data */
2024         spin_lock_bh(&uid_tag_data_tree_lock);
2025         node = rb_first(&uid_tag_data_tree);
2026         while (node) {
2027                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2028                 entry_uid = utd_entry->uid;
2029                 node = rb_next(node);
2030
2031                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2032                          "utd uid=%u\n",
2033                          input, entry_uid);
2034
2035                 if (entry_uid != uid)
2036                         continue;
2037                 /*
2038                  * Go over the tag_refs, and those that don't have
2039                  * sock_tags using them are freed.
2040                  */
2041                 put_tag_ref_tree(tag, utd_entry);
2042                 put_utd_entry(utd_entry);
2043         }
2044         spin_unlock_bh(&uid_tag_data_tree_lock);
2045
2046         atomic64_inc(&qtu_events.delete_cmds);
2047         res = 0;
2048
2049 err:
2050         return res;
2051 }
2052
2053 static int ctrl_cmd_counter_set(const char *input)
2054 {
2055         char cmd;
2056         uid_t uid = 0;
2057         tag_t tag;
2058         int res, argc;
2059         struct tag_counter_set *tcs;
2060         int counter_set;
2061
2062         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2063         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2064                  "set=%d uid=%u\n", input, argc, cmd,
2065                  counter_set, uid);
2066         if (argc != 3) {
2067                 res = -EINVAL;
2068                 goto err;
2069         }
2070         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2071                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2072                         input);
2073                 res = -EINVAL;
2074                 goto err;
2075         }
2076         if (!can_manipulate_uids()) {
2077                 pr_info("qtaguid: ctrl_counterset(%s): "
2078                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2079                         input, current->pid, current->tgid, current_fsuid());
2080                 res = -EPERM;
2081                 goto err;
2082         }
2083
2084         tag = make_tag_from_uid(uid);
2085         spin_lock_bh(&tag_counter_set_list_lock);
2086         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2087         if (!tcs) {
2088                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2089                 if (!tcs) {
2090                         spin_unlock_bh(&tag_counter_set_list_lock);
2091                         pr_err("qtaguid: ctrl_counterset(%s): "
2092                                "failed to alloc counter set\n",
2093                                input);
2094                         res = -ENOMEM;
2095                         goto err;
2096                 }
2097                 tcs->tn.tag = tag;
2098                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2099                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2100                          "(uid=%u) set=%d\n",
2101                          input, tag, get_uid_from_tag(tag), counter_set);
2102         }
2103         tcs->active_set = counter_set;
2104         spin_unlock_bh(&tag_counter_set_list_lock);
2105         atomic64_inc(&qtu_events.counter_set_changes);
2106         res = 0;
2107
2108 err:
2109         return res;
2110 }
2111
2112 static int ctrl_cmd_tag(const char *input)
2113 {
2114         char cmd;
2115         int sock_fd = 0;
2116         uid_t uid = 0;
2117         tag_t acct_tag = make_atag_from_value(0);
2118         tag_t full_tag;
2119         struct socket *el_socket;
2120         int res, argc;
2121         struct sock_tag *sock_tag_entry;
2122         struct tag_ref *tag_ref_entry;
2123         struct uid_tag_data *uid_tag_data_entry;
2124         struct proc_qtu_data *pqd_entry;
2125
2126         /* Unassigned args will get defaulted later. */
2127         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2128         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2129                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2130                  acct_tag, uid);
2131         if (argc < 2) {
2132                 res = -EINVAL;
2133                 goto err;
2134         }
2135         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2136         if (!el_socket) {
2137                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2138                         " sock_fd=%d err=%d\n", input, sock_fd, res);
2139                 goto err;
2140         }
2141         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2142                  input, atomic_long_read(&el_socket->file->f_count),
2143                  el_socket->sk);
2144         if (argc < 3) {
2145                 acct_tag = make_atag_from_value(0);
2146         } else if (!valid_atag(acct_tag)) {
2147                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2148                 res = -EINVAL;
2149                 goto err_put;
2150         }
2151         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2152                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2153                  "in_group=%d in_egroup=%d\n",
2154                  input, current->pid, current->tgid, current_uid(),
2155                  current_euid(), current_fsuid(),
2156                  in_group_p(proc_ctrl_write_gid),
2157                  in_egroup_p(proc_ctrl_write_gid));
2158         if (argc < 4) {
2159                 uid = current_fsuid();
2160         } else if (!can_impersonate_uid(uid)) {
2161                 pr_info("qtaguid: ctrl_tag(%s): "
2162                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2163                         input, current->pid, current->tgid, current_fsuid());
2164                 res = -EPERM;
2165                 goto err_put;
2166         }
2167         full_tag = combine_atag_with_uid(acct_tag, uid);
2168
2169         spin_lock_bh(&sock_tag_list_lock);
2170         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2171         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2172         if (IS_ERR(tag_ref_entry)) {
2173                 res = PTR_ERR(tag_ref_entry);
2174                 spin_unlock_bh(&sock_tag_list_lock);
2175                 goto err_put;
2176         }
2177         tag_ref_entry->num_sock_tags++;
2178         if (sock_tag_entry) {
2179                 struct tag_ref *prev_tag_ref_entry;
2180
2181                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2182                          "st@%p ...->f_count=%ld\n",
2183                          input, el_socket->sk, sock_tag_entry,
2184                          atomic_long_read(&el_socket->file->f_count));
2185                 /*
2186                  * This is a re-tagging, so release the sock_fd that was
2187                  * locked at the time of the 1st tagging.
2188                  * There is still the ref from this call's sockfd_lookup() so
2189                  * it can be done within the spinlock.
2190                  */
2191                 sockfd_put(sock_tag_entry->socket);
2192                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2193                                                     &uid_tag_data_entry);
2194                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2195                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2196                 prev_tag_ref_entry->num_sock_tags--;
2197                 sock_tag_entry->tag = full_tag;
2198         } else {
2199                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2200                          input, el_socket->sk);
2201                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2202                                          GFP_ATOMIC);
2203                 if (!sock_tag_entry) {
2204                         pr_err("qtaguid: ctrl_tag(%s): "
2205                                "socket tag alloc failed\n",
2206                                input);
2207                         spin_unlock_bh(&sock_tag_list_lock);
2208                         res = -ENOMEM;
2209                         goto err_tag_unref_put;
2210                 }
2211                 sock_tag_entry->sk = el_socket->sk;
2212                 sock_tag_entry->socket = el_socket;
2213                 sock_tag_entry->pid = current->tgid;
2214                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2215                                                             uid);
2216                 spin_lock_bh(&uid_tag_data_tree_lock);
2217                 pqd_entry = proc_qtu_data_tree_search(
2218                         &proc_qtu_data_tree, current->tgid);
2219                 /*
2220                  * TODO: remove if, and start failing.
2221                  * At first, we want to catch user-space code that is not
2222                  * opening the /dev/xt_qtaguid.
2223                  */
2224                 if (IS_ERR_OR_NULL(pqd_entry))
2225                         pr_warn_once(
2226                                 "qtaguid: %s(): "
2227                                 "User space forgot to open /dev/xt_qtaguid? "
2228                                 "pid=%u tgid=%u uid=%u\n", __func__,
2229                                 current->pid, current->tgid,
2230                                 current_fsuid());
2231                 else
2232                         list_add(&sock_tag_entry->list,
2233                                  &pqd_entry->sock_tag_list);
2234                 spin_unlock_bh(&uid_tag_data_tree_lock);
2235
2236                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2237                 atomic64_inc(&qtu_events.sockets_tagged);
2238         }
2239         spin_unlock_bh(&sock_tag_list_lock);
2240         /* We keep the ref to the socket (file) until it is untagged */
2241         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2242                  input, sock_tag_entry,
2243                  atomic_long_read(&el_socket->file->f_count));
2244         return 0;
2245
2246 err_tag_unref_put:
2247         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2248         tag_ref_entry->num_sock_tags--;
2249         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2250 err_put:
2251         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2252                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2253         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2254         sockfd_put(el_socket);
2255         return res;
2256
2257 err:
2258         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2259         return res;
2260 }
2261
2262 static int ctrl_cmd_untag(const char *input)
2263 {
2264         char cmd;
2265         int sock_fd = 0;
2266         struct socket *el_socket;
2267         int res, argc;
2268         struct sock_tag *sock_tag_entry;
2269         struct tag_ref *tag_ref_entry;
2270         struct uid_tag_data *utd_entry;
2271         struct proc_qtu_data *pqd_entry;
2272
2273         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2274         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2275                  input, argc, cmd, sock_fd);
2276         if (argc < 2) {
2277                 res = -EINVAL;
2278                 goto err;
2279         }
2280         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2281         if (!el_socket) {
2282                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2283                         " sock_fd=%d err=%d\n", input, sock_fd, res);
2284                 goto err;
2285         }
2286         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2287                  input, atomic_long_read(&el_socket->file->f_count),
2288                  el_socket->sk);
2289         spin_lock_bh(&sock_tag_list_lock);
2290         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2291         if (!sock_tag_entry) {
2292                 spin_unlock_bh(&sock_tag_list_lock);
2293                 res = -EINVAL;
2294                 goto err_put;
2295         }
2296         /*
2297          * The socket already belongs to the current process
2298          * so it can do whatever it wants to it.
2299          */
2300         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2301
2302         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2303         BUG_ON(!tag_ref_entry);
2304         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2305         spin_lock_bh(&uid_tag_data_tree_lock);
2306         pqd_entry = proc_qtu_data_tree_search(
2307                 &proc_qtu_data_tree, current->tgid);
2308         /*
2309          * TODO: remove if, and start failing.
2310          * At first, we want to catch user-space code that is not
2311          * opening the /dev/xt_qtaguid.
2312          */
2313         if (IS_ERR_OR_NULL(pqd_entry))
2314                 pr_warn_once("qtaguid: %s(): "
2315                              "User space forgot to open /dev/xt_qtaguid? "
2316                              "pid=%u tgid=%u uid=%u\n", __func__,
2317                              current->pid, current->tgid, current_fsuid());
2318         else
2319                 list_del(&sock_tag_entry->list);
2320         spin_unlock_bh(&uid_tag_data_tree_lock);
2321         /*
2322          * We don't free tag_ref from the utd_entry here,
2323          * only during a cmd_delete().
2324          */
2325         tag_ref_entry->num_sock_tags--;
2326         spin_unlock_bh(&sock_tag_list_lock);
2327         /*
2328          * Release the sock_fd that was grabbed at tag time,
2329          * and once more for the sockfd_lookup() here.
2330          */
2331         sockfd_put(sock_tag_entry->socket);
2332         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2333                  input, sock_tag_entry,
2334                  atomic_long_read(&el_socket->file->f_count) - 1);
2335         sockfd_put(el_socket);
2336
2337         kfree(sock_tag_entry);
2338         atomic64_inc(&qtu_events.sockets_untagged);
2339
2340         return 0;
2341
2342 err_put:
2343         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2344                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2345         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2346         sockfd_put(el_socket);
2347         return res;
2348
2349 err:
2350         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2351         return res;
2352 }
2353
2354 static int qtaguid_ctrl_parse(const char *input, int count)
2355 {
2356         char cmd;
2357         int res;
2358
2359         cmd = input[0];
2360         /* Collect params for commands */
2361         switch (cmd) {
2362         case 'd':
2363                 res = ctrl_cmd_delete(input);
2364                 break;
2365
2366         case 's':
2367                 res = ctrl_cmd_counter_set(input);
2368                 break;
2369
2370         case 't':
2371                 res = ctrl_cmd_tag(input);
2372                 break;
2373
2374         case 'u':
2375                 res = ctrl_cmd_untag(input);
2376                 break;
2377
2378         default:
2379                 res = -EINVAL;
2380                 goto err;
2381         }
2382         if (!res)
2383                 res = count;
2384 err:
2385         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2386         return res;
2387 }
2388
2389 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2390 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2391                         unsigned long count, void *data)
2392 {
2393         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2394
2395         if (unlikely(module_passive))
2396                 return count;
2397
2398         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2399                 return -EINVAL;
2400
2401         if (copy_from_user(input_buf, buffer, count))
2402                 return -EFAULT;
2403
2404         input_buf[count] = '\0';
2405         return qtaguid_ctrl_parse(input_buf, count);
2406 }
2407
2408 struct proc_print_info {
2409         char *outp;
2410         char **num_items_returned;
2411         struct iface_stat *iface_entry;
2412         struct tag_stat *ts_entry;
2413         int item_index;
2414         int items_to_skip;
2415         int char_count;
2416 };
2417
2418 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2419 {
2420         int len;
2421         struct data_counters *cnts;
2422
2423         if (!ppi->item_index) {
2424                 if (ppi->item_index++ < ppi->items_to_skip)
2425                         return 0;
2426                 len = snprintf(ppi->outp, ppi->char_count,
2427                                "idx iface acct_tag_hex uid_tag_int cnt_set "
2428                                "rx_bytes rx_packets "
2429                                "tx_bytes tx_packets "
2430                                "rx_tcp_bytes rx_tcp_packets "
2431                                "rx_udp_bytes rx_udp_packets "
2432                                "rx_other_bytes rx_other_packets "
2433                                "tx_tcp_bytes tx_tcp_packets "
2434                                "tx_udp_bytes tx_udp_packets "
2435                                "tx_other_bytes tx_other_packets\n");
2436         } else {
2437                 tag_t tag = ppi->ts_entry->tn.tag;
2438                 uid_t stat_uid = get_uid_from_tag(tag);
2439
2440                 if (!can_read_other_uid_stats(stat_uid)) {
2441                         CT_DEBUG("qtaguid: stats line: "
2442                                  "%s 0x%llx %u: insufficient priv "
2443                                  "from pid=%u tgid=%u uid=%u\n",
2444                                  ppi->iface_entry->ifname,
2445                                  get_atag_from_tag(tag), stat_uid,
2446                                  current->pid, current->tgid, current_fsuid());
2447                         return 0;
2448                 }
2449                 if (ppi->item_index++ < ppi->items_to_skip)
2450                         return 0;
2451                 cnts = &ppi->ts_entry->counters;
2452                 len = snprintf(
2453                         ppi->outp, ppi->char_count,
2454                         "%d %s 0x%llx %u %u "
2455                         "%llu %llu "
2456                         "%llu %llu "
2457                         "%llu %llu "
2458                         "%llu %llu "
2459                         "%llu %llu "
2460                         "%llu %llu "
2461                         "%llu %llu "
2462                         "%llu %llu\n",
2463                         ppi->item_index,
2464                         ppi->iface_entry->ifname,
2465                         get_atag_from_tag(tag),
2466                         stat_uid,
2467                         cnt_set,
2468                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
2469                         dc_sum_packets(cnts, cnt_set, IFS_RX),
2470                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
2471                         dc_sum_packets(cnts, cnt_set, IFS_TX),
2472                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2473                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2474                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2475                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2476                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2477                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2478                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2479                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2480                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2481                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2482                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2483                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2484         }
2485         return len;
2486 }
2487
2488 static bool pp_sets(struct proc_print_info *ppi)
2489 {
2490         int len;
2491         int counter_set;
2492         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2493              counter_set++) {
2494                 len = pp_stats_line(ppi, counter_set);
2495                 if (len >= ppi->char_count) {
2496                         *ppi->outp = '\0';
2497                         return false;
2498                 }
2499                 if (len) {
2500                         ppi->outp += len;
2501                         ppi->char_count -= len;
2502                         (*ppi->num_items_returned)++;
2503                 }
2504         }
2505         return true;
2506 }
2507
2508 /*
2509  * Procfs reader to get all tag stats using style "1)" as described in
2510  * fs/proc/generic.c
2511  * Groups all protocols tx/rx bytes.
2512  */
2513 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2514                                 off_t items_to_skip, int char_count, int *eof,
2515                                 void *data)
2516 {
2517         struct proc_print_info ppi;
2518         int len;
2519
2520         ppi.outp = page;
2521         ppi.item_index = 0;
2522         ppi.char_count = char_count;
2523         ppi.num_items_returned = num_items_returned;
2524         ppi.items_to_skip = items_to_skip;
2525
2526         if (unlikely(module_passive)) {
2527                 len = pp_stats_line(&ppi, 0);
2528                 /* The header should always be shorter than the buffer. */
2529                 BUG_ON(len >= ppi.char_count);
2530                 (*num_items_returned)++;
2531                 *eof = 1;
2532                 return len;
2533         }
2534
2535         CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
2536                 "char_count=%d *eof=%d\n", page, *num_items_returned,
2537                 items_to_skip, char_count, *eof);
2538
2539         if (*eof)
2540                 return 0;
2541
2542         /* The idx is there to help debug when things go belly up. */
2543         len = pp_stats_line(&ppi, 0);
2544         /* Don't advance the outp unless the whole line was printed */
2545         if (len >= ppi.char_count) {
2546                 *ppi.outp = '\0';
2547                 return ppi.outp - page;
2548         }
2549         if (len) {
2550                 ppi.outp += len;
2551                 ppi.char_count -= len;
2552                 (*num_items_returned)++;
2553         }
2554
2555         spin_lock_bh(&iface_stat_list_lock);
2556         list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2557                 struct rb_node *node;
2558                 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2559                 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2560                      node;
2561                      node = rb_next(node)) {
2562                         ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2563                         if (!pp_sets(&ppi)) {
2564                                 spin_unlock_bh(
2565                                         &ppi.iface_entry->tag_stat_list_lock);
2566                                 spin_unlock_bh(&iface_stat_list_lock);
2567                                 return ppi.outp - page;
2568                         }
2569                 }
2570                 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2571         }
2572         spin_unlock_bh(&iface_stat_list_lock);
2573
2574         *eof = 1;
2575         return ppi.outp - page;
2576 }
2577
2578 /*------------------------------------------*/
2579 static int qtudev_open(struct inode *inode, struct file *file)
2580 {
2581         struct uid_tag_data *utd_entry;
2582         struct proc_qtu_data  *pqd_entry;
2583         struct proc_qtu_data  *new_pqd_entry;
2584         int res;
2585         bool utd_entry_found;
2586
2587         if (unlikely(qtu_proc_handling_passive))
2588                 return 0;
2589
2590         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2591                  current->pid, current->tgid, current_fsuid());
2592
2593         spin_lock_bh(&uid_tag_data_tree_lock);
2594
2595         /* Look for existing uid data, or alloc one. */
2596         utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2597         if (IS_ERR_OR_NULL(utd_entry)) {
2598                 res = PTR_ERR(utd_entry);
2599                 goto err;
2600         }
2601
2602         /* Look for existing PID based proc_data */
2603         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2604                                               current->tgid);
2605         if (pqd_entry) {
2606                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2607                        "%s already opened\n",
2608                        current->pid, current->tgid, current_fsuid(),
2609                        QTU_DEV_NAME);
2610                 res = -EBUSY;
2611                 goto err_unlock_free_utd;
2612         }
2613
2614         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2615         if (!new_pqd_entry) {
2616                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2617                        "proc data alloc failed\n",
2618                        current->pid, current->tgid, current_fsuid());
2619                 res = -ENOMEM;
2620                 goto err_unlock_free_utd;
2621         }
2622         new_pqd_entry->pid = current->tgid;
2623         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2624         new_pqd_entry->parent_tag_data = utd_entry;
2625         utd_entry->num_pqd++;
2626
2627         proc_qtu_data_tree_insert(new_pqd_entry,
2628                                   &proc_qtu_data_tree);
2629
2630         spin_unlock_bh(&uid_tag_data_tree_lock);
2631         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2632                  current_fsuid(), new_pqd_entry);
2633         file->private_data = new_pqd_entry;
2634         return 0;
2635
2636 err_unlock_free_utd:
2637         if (!utd_entry_found) {
2638                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2639                 kfree(utd_entry);
2640         }
2641         spin_unlock_bh(&uid_tag_data_tree_lock);
2642 err:
2643         return res;
2644 }
2645
2646 static int qtudev_release(struct inode *inode, struct file *file)
2647 {
2648         struct proc_qtu_data  *pqd_entry = file->private_data;
2649         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2650         struct sock_tag *st_entry;
2651         struct rb_root st_to_free_tree = RB_ROOT;
2652         struct list_head *entry, *next;
2653         struct tag_ref *tr;
2654
2655         if (unlikely(qtu_proc_handling_passive))
2656                 return 0;
2657
2658         /*
2659          * Do not trust the current->pid, it might just be a kworker cleaning
2660          * up after a dead proc.
2661          */
2662         DR_DEBUG("qtaguid: qtudev_release(): "
2663                  "pid=%u tgid=%u uid=%u "
2664                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2665                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2666                  pqd_entry, pqd_entry->pid, utd_entry,
2667                  utd_entry->num_active_tags);
2668
2669         spin_lock_bh(&sock_tag_list_lock);
2670         spin_lock_bh(&uid_tag_data_tree_lock);
2671
2672         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2673                 st_entry = list_entry(entry, struct sock_tag, list);
2674                 DR_DEBUG("qtaguid: %s(): "
2675                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2676                          __func__,
2677                          st_entry, st_entry->sk,
2678                          current->pid, current->tgid,
2679                          pqd_entry->parent_tag_data->uid);
2680
2681                 utd_entry = uid_tag_data_tree_search(
2682                         &uid_tag_data_tree,
2683                         get_uid_from_tag(st_entry->tag));
2684                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2685                 DR_DEBUG("qtaguid: %s(): "
2686                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2687                          st_entry->tag, utd_entry);
2688                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2689                                          st_entry->tag);
2690                 BUG_ON(!tr);
2691                 BUG_ON(tr->num_sock_tags <= 0);
2692                 tr->num_sock_tags--;
2693                 free_tag_ref_from_utd_entry(tr, utd_entry);
2694
2695                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2696                 list_del(&st_entry->list);
2697                 /* Can't sockfd_put() within spinlock, do it later. */
2698                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2699
2700                 /*
2701                  * Try to free the utd_entry if no other proc_qtu_data is
2702                  * using it (num_pqd is 0) and it doesn't have active tags
2703                  * (num_active_tags is 0).
2704                  */
2705                 put_utd_entry(utd_entry);
2706         }
2707
2708         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2709         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2710         pqd_entry->parent_tag_data->num_pqd--;
2711         put_utd_entry(pqd_entry->parent_tag_data);
2712         kfree(pqd_entry);
2713         file->private_data = NULL;
2714
2715         spin_unlock_bh(&uid_tag_data_tree_lock);
2716         spin_unlock_bh(&sock_tag_list_lock);
2717
2718
2719         sock_tag_tree_erase(&st_to_free_tree);
2720
2721         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2722                            current->pid, current->tgid);
2723         return 0;
2724 }
2725
2726 /*------------------------------------------*/
2727 static const struct file_operations qtudev_fops = {
2728         .owner = THIS_MODULE,
2729         .open = qtudev_open,
2730         .release = qtudev_release,
2731 };
2732
2733 static struct miscdevice qtu_device = {
2734         .minor = MISC_DYNAMIC_MINOR,
2735         .name = QTU_DEV_NAME,
2736         .fops = &qtudev_fops,
2737         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2738 };
2739
2740 /*------------------------------------------*/
2741 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2742 {
2743         int ret;
2744         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2745         if (!*res_procdir) {
2746                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2747                 ret = -ENOMEM;
2748                 goto no_dir;
2749         }
2750
2751         xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2752                                                 *res_procdir);
2753         if (!xt_qtaguid_ctrl_file) {
2754                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2755                         " file\n");
2756                 ret = -ENOMEM;
2757                 goto no_ctrl_entry;
2758         }
2759         xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2760         xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2761
2762         xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2763                                                 *res_procdir);
2764         if (!xt_qtaguid_stats_file) {
2765                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2766                         "file\n");
2767                 ret = -ENOMEM;
2768                 goto no_stats_entry;
2769         }
2770         xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2771         /*
2772          * TODO: add support counter hacking
2773          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2774          */
2775         return 0;
2776
2777 no_stats_entry:
2778         remove_proc_entry("ctrl", *res_procdir);
2779 no_ctrl_entry:
2780         remove_proc_entry("xt_qtaguid", NULL);
2781 no_dir:
2782         return ret;
2783 }
2784
2785 static struct xt_match qtaguid_mt_reg __read_mostly = {
2786         /*
2787          * This module masquerades as the "owner" module so that iptables
2788          * tools can deal with it.
2789          */
2790         .name       = "owner",
2791         .revision   = 1,
2792         .family     = NFPROTO_UNSPEC,
2793         .match      = qtaguid_mt,
2794         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2795         .me         = THIS_MODULE,
2796 };
2797
2798 static int __init qtaguid_mt_init(void)
2799 {
2800         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2801             || iface_stat_init(xt_qtaguid_procdir)
2802             || xt_register_match(&qtaguid_mt_reg)
2803             || misc_register(&qtu_device))
2804                 return -1;
2805         return 0;
2806 }
2807
2808 /*
2809  * TODO: allow unloading of the module.
2810  * For now stats are permanent.
2811  * Kconfig forces'y/n' and never an 'm'.
2812  */
2813
2814 module_init(qtaguid_mt_init);
2815 MODULE_AUTHOR("jpa <jpa@google.com>");
2816 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2817 MODULE_LICENSE("GPL");
2818 MODULE_ALIAS("ipt_owner");
2819 MODULE_ALIAS("ip6t_owner");
2820 MODULE_ALIAS("ipt_qtaguid");
2821 MODULE_ALIAS("ip6t_qtaguid");