netfilter: Build fixups - kuid/kguid changes & xt_socket_get/put_sk
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/ratelimit.h>
23 #include <linux/seq_file.h>
24 #include <linux/skbuff.h>
25 #include <linux/workqueue.h>
26 #include <net/addrconf.h>
27 #include <net/sock.h>
28 #include <net/tcp.h>
29 #include <net/udp.h>
30
31 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
32 #include <linux/netfilter_ipv6/ip6_tables.h>
33 #endif
34
35 #include <linux/netfilter/xt_socket.h>
36 #include "xt_qtaguid_internal.h"
37 #include "xt_qtaguid_print.h"
38 #include "../../fs/proc/internal.h"
39
40 /*
41  * We only use the xt_socket funcs within a similar context to avoid unexpected
42  * return values.
43  */
44 #define XT_SOCKET_SUPPORTED_HOOKS \
45         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
46
47
48 static const char *module_procdirname = "xt_qtaguid";
49 static struct proc_dir_entry *xt_qtaguid_procdir;
50
51 static unsigned int proc_iface_perms = S_IRUGO;
52 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
53
54 static struct proc_dir_entry *xt_qtaguid_stats_file;
55 static unsigned int proc_stats_perms = S_IRUGO;
56 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
57
58 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
59
60 /* Everybody can write. But proc_ctrl_write_limited is true by default which
61  * limits what can be controlled. See the can_*() functions.
62  */
63 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
64 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
65
66 /* Limited by default, so the gid of the ctrl and stats proc entries
67  * will limit what can be done. See the can_*() functions.
68  */
69 static bool proc_stats_readall_limited = true;
70 static bool proc_ctrl_write_limited = true;
71
72 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
73                    S_IRUGO | S_IWUSR);
74 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
75                    S_IRUGO | S_IWUSR);
76
77 /*
78  * Limit the number of active tags (via socket tags) for a given UID.
79  * Multiple processes could share the UID.
80  */
81 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
82 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
83
84 /*
85  * After the kernel has initiallized this module, it is still possible
86  * to make it passive.
87  * Setting passive to Y:
88  *  - the iface stats handling will not act on notifications.
89  *  - iptables matches will never match.
90  *  - ctrl commands silently succeed.
91  *  - stats are always empty.
92  * This is mostly usefull when a bug is suspected.
93  */
94 static bool module_passive;
95 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
96
97 /*
98  * Control how qtaguid data is tracked per proc/uid.
99  * Setting tag_tracking_passive to Y:
100  *  - don't create proc specific structs to track tags
101  *  - don't check that active tag stats exceed some limits.
102  *  - don't clean up socket tags on process exits.
103  * This is mostly usefull when a bug is suspected.
104  */
105 static bool qtu_proc_handling_passive;
106 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
107                    S_IRUGO | S_IWUSR);
108
109 #define QTU_DEV_NAME "xt_qtaguid"
110
111 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
112 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
113
114 /*---------------------------------------------------------------------------*/
115 static const char *iface_stat_procdirname = "iface_stat";
116 static struct proc_dir_entry *iface_stat_procdir;
117 /*
118  * The iface_stat_all* will go away once userspace gets use to the new fields
119  * that have a format line.
120  */
121 static const char *iface_stat_all_procfilename = "iface_stat_all";
122 static struct proc_dir_entry *iface_stat_all_procfile;
123 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
124 static struct proc_dir_entry *iface_stat_fmt_procfile;
125
126
127 static LIST_HEAD(iface_stat_list);
128 static DEFINE_SPINLOCK(iface_stat_list_lock);
129
130 static struct rb_root sock_tag_tree = RB_ROOT;
131 static DEFINE_SPINLOCK(sock_tag_list_lock);
132
133 static struct rb_root tag_counter_set_tree = RB_ROOT;
134 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
135
136 static struct rb_root uid_tag_data_tree = RB_ROOT;
137 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
138
139 static struct rb_root proc_qtu_data_tree = RB_ROOT;
140 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
141
142 static struct qtaguid_event_counts qtu_events;
143 /*----------------------------------------------*/
144 static bool can_manipulate_uids(void)
145 {
146         /* root pwnd */
147         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
148                 || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || unlikely(!proc_ctrl_write_limited)
149                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
150 }
151
152 static bool can_impersonate_uid(kuid_t uid)
153 {
154         return uid_eq(uid, current_fsuid()) || can_manipulate_uids();
155 }
156
157 static bool can_read_other_uid_stats(kuid_t uid)
158 {
159         /* root pwnd */
160         return in_egroup_p(xt_qtaguid_stats_file->gid)
161                 || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || uid_eq(uid, current_fsuid())
162                 || unlikely(!proc_stats_readall_limited)
163                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
164 }
165
166 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
167                                   enum ifs_tx_rx direction,
168                                   enum ifs_proto ifs_proto,
169                                   int bytes,
170                                   int packets)
171 {
172         counters->bpc[set][direction][ifs_proto].bytes += bytes;
173         counters->bpc[set][direction][ifs_proto].packets += packets;
174 }
175
176 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
177 {
178         struct rb_node *node = root->rb_node;
179
180         while (node) {
181                 struct tag_node *data = rb_entry(node, struct tag_node, node);
182                 int result;
183                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
184                          " node=%p data=%p\n", tag, node, data);
185                 result = tag_compare(tag, data->tag);
186                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
187                          " data.tag=0x%llx (uid=%u) res=%d\n",
188                          tag, data->tag, get_uid_from_tag(data->tag), result);
189                 if (result < 0)
190                         node = node->rb_left;
191                 else if (result > 0)
192                         node = node->rb_right;
193                 else
194                         return data;
195         }
196         return NULL;
197 }
198
199 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
200 {
201         struct rb_node **new = &(root->rb_node), *parent = NULL;
202
203         /* Figure out where to put new node */
204         while (*new) {
205                 struct tag_node *this = rb_entry(*new, struct tag_node,
206                                                  node);
207                 int result = tag_compare(data->tag, this->tag);
208                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
209                          " (uid=%u)\n", __func__,
210                          this->tag,
211                          get_uid_from_tag(this->tag));
212                 parent = *new;
213                 if (result < 0)
214                         new = &((*new)->rb_left);
215                 else if (result > 0)
216                         new = &((*new)->rb_right);
217                 else
218                         BUG();
219         }
220
221         /* Add new node and rebalance tree. */
222         rb_link_node(&data->node, parent, new);
223         rb_insert_color(&data->node, root);
224 }
225
226 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
227 {
228         tag_node_tree_insert(&data->tn, root);
229 }
230
231 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
232 {
233         struct tag_node *node = tag_node_tree_search(root, tag);
234         if (!node)
235                 return NULL;
236         return rb_entry(&node->node, struct tag_stat, tn.node);
237 }
238
239 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
240                                         struct rb_root *root)
241 {
242         tag_node_tree_insert(&data->tn, root);
243 }
244
245 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
246                                                            tag_t tag)
247 {
248         struct tag_node *node = tag_node_tree_search(root, tag);
249         if (!node)
250                 return NULL;
251         return rb_entry(&node->node, struct tag_counter_set, tn.node);
252
253 }
254
255 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
256 {
257         tag_node_tree_insert(&data->tn, root);
258 }
259
260 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
261 {
262         struct tag_node *node = tag_node_tree_search(root, tag);
263         if (!node)
264                 return NULL;
265         return rb_entry(&node->node, struct tag_ref, tn.node);
266 }
267
268 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
269                                              const struct sock *sk)
270 {
271         struct rb_node *node = root->rb_node;
272
273         while (node) {
274                 struct sock_tag *data = rb_entry(node, struct sock_tag,
275                                                  sock_node);
276                 if (sk < data->sk)
277                         node = node->rb_left;
278                 else if (sk > data->sk)
279                         node = node->rb_right;
280                 else
281                         return data;
282         }
283         return NULL;
284 }
285
286 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
287 {
288         struct rb_node **new = &(root->rb_node), *parent = NULL;
289
290         /* Figure out where to put new node */
291         while (*new) {
292                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
293                                                  sock_node);
294                 parent = *new;
295                 if (data->sk < this->sk)
296                         new = &((*new)->rb_left);
297                 else if (data->sk > this->sk)
298                         new = &((*new)->rb_right);
299                 else
300                         BUG();
301         }
302
303         /* Add new node and rebalance tree. */
304         rb_link_node(&data->sock_node, parent, new);
305         rb_insert_color(&data->sock_node, root);
306 }
307
308 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
309 {
310         struct rb_node *node;
311         struct sock_tag *st_entry;
312
313         node = rb_first(st_to_free_tree);
314         while (node) {
315                 st_entry = rb_entry(node, struct sock_tag, sock_node);
316                 node = rb_next(node);
317                 CT_DEBUG("qtaguid: %s(): "
318                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
319                          st_entry->sk,
320                          st_entry->tag,
321                          get_uid_from_tag(st_entry->tag));
322                 rb_erase(&st_entry->sock_node, st_to_free_tree);
323                 sockfd_put(st_entry->socket);
324                 kfree(st_entry);
325         }
326 }
327
328 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
329                                                        const pid_t pid)
330 {
331         struct rb_node *node = root->rb_node;
332
333         while (node) {
334                 struct proc_qtu_data *data = rb_entry(node,
335                                                       struct proc_qtu_data,
336                                                       node);
337                 if (pid < data->pid)
338                         node = node->rb_left;
339                 else if (pid > data->pid)
340                         node = node->rb_right;
341                 else
342                         return data;
343         }
344         return NULL;
345 }
346
347 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
348                                       struct rb_root *root)
349 {
350         struct rb_node **new = &(root->rb_node), *parent = NULL;
351
352         /* Figure out where to put new node */
353         while (*new) {
354                 struct proc_qtu_data *this = rb_entry(*new,
355                                                       struct proc_qtu_data,
356                                                       node);
357                 parent = *new;
358                 if (data->pid < this->pid)
359                         new = &((*new)->rb_left);
360                 else if (data->pid > this->pid)
361                         new = &((*new)->rb_right);
362                 else
363                         BUG();
364         }
365
366         /* Add new node and rebalance tree. */
367         rb_link_node(&data->node, parent, new);
368         rb_insert_color(&data->node, root);
369 }
370
371 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
372                                      struct rb_root *root)
373 {
374         struct rb_node **new = &(root->rb_node), *parent = NULL;
375
376         /* Figure out where to put new node */
377         while (*new) {
378                 struct uid_tag_data *this = rb_entry(*new,
379                                                      struct uid_tag_data,
380                                                      node);
381                 parent = *new;
382                 if (data->uid < this->uid)
383                         new = &((*new)->rb_left);
384                 else if (data->uid > this->uid)
385                         new = &((*new)->rb_right);
386                 else
387                         BUG();
388         }
389
390         /* Add new node and rebalance tree. */
391         rb_link_node(&data->node, parent, new);
392         rb_insert_color(&data->node, root);
393 }
394
395 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
396                                                      uid_t uid)
397 {
398         struct rb_node *node = root->rb_node;
399
400         while (node) {
401                 struct uid_tag_data *data = rb_entry(node,
402                                                      struct uid_tag_data,
403                                                      node);
404                 if (uid < data->uid)
405                         node = node->rb_left;
406                 else if (uid > data->uid)
407                         node = node->rb_right;
408                 else
409                         return data;
410         }
411         return NULL;
412 }
413
414 /*
415  * Allocates a new uid_tag_data struct if needed.
416  * Returns a pointer to the found or allocated uid_tag_data.
417  * Returns a PTR_ERR on failures, and lock is not held.
418  * If found is not NULL:
419  *   sets *found to true if not allocated.
420  *   sets *found to false if allocated.
421  */
422 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
423 {
424         struct uid_tag_data *utd_entry;
425
426         /* Look for top level uid_tag_data for the UID */
427         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
428         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
429
430         if (found_res)
431                 *found_res = utd_entry;
432         if (utd_entry)
433                 return utd_entry;
434
435         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
436         if (!utd_entry) {
437                 pr_err("qtaguid: get_uid_data(%u): "
438                        "tag data alloc failed\n", uid);
439                 return ERR_PTR(-ENOMEM);
440         }
441
442         utd_entry->uid = uid;
443         utd_entry->tag_ref_tree = RB_ROOT;
444         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
445         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
446         return utd_entry;
447 }
448
449 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
450 static struct tag_ref *new_tag_ref(tag_t new_tag,
451                                    struct uid_tag_data *utd_entry)
452 {
453         struct tag_ref *tr_entry;
454         int res;
455
456         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
457                 pr_info("qtaguid: new_tag_ref(0x%llx): "
458                         "tag ref alloc quota exceeded. max=%d\n",
459                         new_tag, max_sock_tags);
460                 res = -EMFILE;
461                 goto err_res;
462
463         }
464
465         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
466         if (!tr_entry) {
467                 pr_err("qtaguid: new_tag_ref(0x%llx): "
468                        "tag ref alloc failed\n",
469                        new_tag);
470                 res = -ENOMEM;
471                 goto err_res;
472         }
473         tr_entry->tn.tag = new_tag;
474         /* tr_entry->num_sock_tags  handled by caller */
475         utd_entry->num_active_tags++;
476         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
477         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
478                  " inserted new tag ref %p\n",
479                  new_tag, tr_entry);
480         return tr_entry;
481
482 err_res:
483         return ERR_PTR(res);
484 }
485
486 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
487                                       struct uid_tag_data **utd_res)
488 {
489         struct uid_tag_data *utd_entry;
490         struct tag_ref *tr_entry;
491         bool found_utd;
492         uid_t uid = get_uid_from_tag(full_tag);
493
494         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
495                  full_tag, uid);
496
497         utd_entry = get_uid_data(uid, &found_utd);
498         if (IS_ERR_OR_NULL(utd_entry)) {
499                 if (utd_res)
500                         *utd_res = utd_entry;
501                 return NULL;
502         }
503
504         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
505         if (utd_res)
506                 *utd_res = utd_entry;
507         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
508                  full_tag, utd_entry, tr_entry);
509         return tr_entry;
510 }
511
512 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
513 static struct tag_ref *get_tag_ref(tag_t full_tag,
514                                    struct uid_tag_data **utd_res)
515 {
516         struct uid_tag_data *utd_entry;
517         struct tag_ref *tr_entry;
518
519         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
520                  full_tag);
521         spin_lock_bh(&uid_tag_data_tree_lock);
522         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
523         BUG_ON(IS_ERR_OR_NULL(utd_entry));
524         if (!tr_entry)
525                 tr_entry = new_tag_ref(full_tag, utd_entry);
526
527         spin_unlock_bh(&uid_tag_data_tree_lock);
528         if (utd_res)
529                 *utd_res = utd_entry;
530         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
531                  full_tag, utd_entry, tr_entry);
532         return tr_entry;
533 }
534
535 /* Checks and maybe frees the UID Tag Data entry */
536 static void put_utd_entry(struct uid_tag_data *utd_entry)
537 {
538         /* Are we done with the UID tag data entry? */
539         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
540                 !utd_entry->num_pqd) {
541                 DR_DEBUG("qtaguid: %s(): "
542                          "erase utd_entry=%p uid=%u "
543                          "by pid=%u tgid=%u uid=%u\n", __func__,
544                          utd_entry, utd_entry->uid,
545                          current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
546                 BUG_ON(utd_entry->num_active_tags);
547                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
548                 kfree(utd_entry);
549         } else {
550                 DR_DEBUG("qtaguid: %s(): "
551                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
552                          __func__, utd_entry, utd_entry->num_active_tags,
553                          utd_entry->num_pqd);
554                 BUG_ON(!(utd_entry->num_active_tags ||
555                          utd_entry->num_pqd));
556         }
557 }
558
559 /*
560  * If no sock_tags are using this tag_ref,
561  * decrements refcount of utd_entry, removes tr_entry
562  * from utd_entry->tag_ref_tree and frees.
563  */
564 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
565                                         struct uid_tag_data *utd_entry)
566 {
567         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
568                  tr_entry, tr_entry->tn.tag,
569                  get_uid_from_tag(tr_entry->tn.tag));
570         if (!tr_entry->num_sock_tags) {
571                 BUG_ON(!utd_entry->num_active_tags);
572                 utd_entry->num_active_tags--;
573                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
574                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
575                 kfree(tr_entry);
576         }
577 }
578
579 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
580 {
581         struct rb_node *node;
582         struct tag_ref *tr_entry;
583         tag_t acct_tag;
584
585         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
586                  full_tag, get_uid_from_tag(full_tag));
587         acct_tag = get_atag_from_tag(full_tag);
588         node = rb_first(&utd_entry->tag_ref_tree);
589         while (node) {
590                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
591                 node = rb_next(node);
592                 if (!acct_tag || tr_entry->tn.tag == full_tag)
593                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
594         }
595 }
596
597 static ssize_t read_proc_u64(struct file *file, char __user *buf,
598                          size_t size, loff_t *ppos)
599 {
600         uint64_t *valuep = PDE_DATA(file_inode(file));
601         char tmp[24];
602         size_t tmp_size;
603
604         tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
605         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
606 }
607
608 static ssize_t read_proc_bool(struct file *file, char __user *buf,
609                           size_t size, loff_t *ppos)
610 {
611         bool *valuep = PDE_DATA(file_inode(file));
612         char tmp[24];
613         size_t tmp_size;
614
615         tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
616         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
617 }
618
619 static int get_active_counter_set(tag_t tag)
620 {
621         int active_set = 0;
622         struct tag_counter_set *tcs;
623
624         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
625                  " (uid=%u)\n",
626                  tag, get_uid_from_tag(tag));
627         /* For now we only handle UID tags for active sets */
628         tag = get_utag_from_tag(tag);
629         spin_lock_bh(&tag_counter_set_list_lock);
630         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
631         if (tcs)
632                 active_set = tcs->active_set;
633         spin_unlock_bh(&tag_counter_set_list_lock);
634         return active_set;
635 }
636
637 /*
638  * Find the entry for tracking the specified interface.
639  * Caller must hold iface_stat_list_lock
640  */
641 static struct iface_stat *get_iface_entry(const char *ifname)
642 {
643         struct iface_stat *iface_entry;
644
645         /* Find the entry for tracking the specified tag within the interface */
646         if (ifname == NULL) {
647                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
648                 return NULL;
649         }
650
651         /* Iterate over interfaces */
652         list_for_each_entry(iface_entry, &iface_stat_list, list) {
653                 if (!strcmp(ifname, iface_entry->ifname))
654                         goto done;
655         }
656         iface_entry = NULL;
657 done:
658         return iface_entry;
659 }
660
661 /* This is for fmt2 only */
662 static void pp_iface_stat_header(struct seq_file *m)
663 {
664         seq_puts(m,
665                  "ifname "
666                  "total_skb_rx_bytes total_skb_rx_packets "
667                  "total_skb_tx_bytes total_skb_tx_packets "
668                  "rx_tcp_bytes rx_tcp_packets "
669                  "rx_udp_bytes rx_udp_packets "
670                  "rx_other_bytes rx_other_packets "
671                  "tx_tcp_bytes tx_tcp_packets "
672                  "tx_udp_bytes tx_udp_packets "
673                  "tx_other_bytes tx_other_packets\n"
674         );
675 }
676
677 static void pp_iface_stat_line(struct seq_file *m,
678                                struct iface_stat *iface_entry)
679 {
680         struct data_counters *cnts;
681         int cnt_set = 0;   /* We only use one set for the device */
682         cnts = &iface_entry->totals_via_skb;
683         seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
684                    "%llu %llu %llu %llu %llu %llu %llu %llu\n",
685                    iface_entry->ifname,
686                    dc_sum_bytes(cnts, cnt_set, IFS_RX),
687                    dc_sum_packets(cnts, cnt_set, IFS_RX),
688                    dc_sum_bytes(cnts, cnt_set, IFS_TX),
689                    dc_sum_packets(cnts, cnt_set, IFS_TX),
690                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
691                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
692                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
693                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
694                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
695                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
696                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
697                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
698                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
699                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
700                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
701                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
702 }
703
704 struct proc_iface_stat_fmt_info {
705         int fmt;
706 };
707
708 static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
709 {
710         struct proc_iface_stat_fmt_info *p = m->private;
711         loff_t n = *pos;
712
713         /*
714          * This lock will prevent iface_stat_update() from changing active,
715          * and in turn prevent an interface from unregistering itself.
716          */
717         spin_lock_bh(&iface_stat_list_lock);
718
719         if (unlikely(module_passive))
720                 return NULL;
721
722         if (!n && p->fmt == 2)
723                 pp_iface_stat_header(m);
724
725         return seq_list_start(&iface_stat_list, n);
726 }
727
728 static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
729 {
730         return seq_list_next(p, &iface_stat_list, pos);
731 }
732
733 static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
734 {
735         spin_unlock_bh(&iface_stat_list_lock);
736 }
737
738 static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
739 {
740         struct proc_iface_stat_fmt_info *p = m->private;
741         struct iface_stat *iface_entry;
742         struct rtnl_link_stats64 dev_stats, *stats;
743         struct rtnl_link_stats64 no_dev_stats = {0};
744
745
746         CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
747                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
748
749         iface_entry = list_entry(v, struct iface_stat, list);
750
751         if (iface_entry->active) {
752                 stats = dev_get_stats(iface_entry->net_dev,
753                                       &dev_stats);
754         } else {
755                 stats = &no_dev_stats;
756         }
757         /*
758          * If the meaning of the data changes, then update the fmtX
759          * string.
760          */
761         if (p->fmt == 1) {
762                 seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
763                            iface_entry->ifname,
764                            iface_entry->active,
765                            iface_entry->totals_via_dev[IFS_RX].bytes,
766                            iface_entry->totals_via_dev[IFS_RX].packets,
767                            iface_entry->totals_via_dev[IFS_TX].bytes,
768                            iface_entry->totals_via_dev[IFS_TX].packets,
769                            stats->rx_bytes, stats->rx_packets,
770                            stats->tx_bytes, stats->tx_packets
771                            );
772         } else {
773                 pp_iface_stat_line(m, iface_entry);
774         }
775         return 0;
776 }
777
778 static const struct file_operations read_u64_fops = {
779         .read           = read_proc_u64,
780         .llseek         = default_llseek,
781 };
782
783 static const struct file_operations read_bool_fops = {
784         .read           = read_proc_bool,
785         .llseek         = default_llseek,
786 };
787
788 static void iface_create_proc_worker(struct work_struct *work)
789 {
790         struct proc_dir_entry *proc_entry;
791         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
792                                                    iface_work);
793         struct iface_stat *new_iface  = isw->iface_entry;
794
795         /* iface_entries are not deleted, so safe to manipulate. */
796         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
797         if (IS_ERR_OR_NULL(proc_entry)) {
798                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
799                 kfree(isw);
800                 return;
801         }
802
803         new_iface->proc_ptr = proc_entry;
804
805         proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
806                          &read_u64_fops,
807                          &new_iface->totals_via_dev[IFS_TX].bytes);
808         proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
809                          &read_u64_fops,
810                          &new_iface->totals_via_dev[IFS_RX].bytes);
811         proc_create_data("tx_packets", proc_iface_perms, proc_entry,
812                          &read_u64_fops,
813                          &new_iface->totals_via_dev[IFS_TX].packets);
814         proc_create_data("rx_packets", proc_iface_perms, proc_entry,
815                          &read_u64_fops,
816                          &new_iface->totals_via_dev[IFS_RX].packets);
817         proc_create_data("active", proc_iface_perms, proc_entry,
818                          &read_bool_fops, &new_iface->active);
819
820         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
821                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
822         kfree(isw);
823 }
824
825 /*
826  * Will set the entry's active state, and
827  * update the net_dev accordingly also.
828  */
829 static void _iface_stat_set_active(struct iface_stat *entry,
830                                    struct net_device *net_dev,
831                                    bool activate)
832 {
833         if (activate) {
834                 entry->net_dev = net_dev;
835                 entry->active = true;
836                 IF_DEBUG("qtaguid: %s(%s): "
837                          "enable tracking. rfcnt=%d\n", __func__,
838                          entry->ifname,
839                          __this_cpu_read(*net_dev->pcpu_refcnt));
840         } else {
841                 entry->active = false;
842                 entry->net_dev = NULL;
843                 IF_DEBUG("qtaguid: %s(%s): "
844                          "disable tracking. rfcnt=%d\n", __func__,
845                          entry->ifname,
846                          __this_cpu_read(*net_dev->pcpu_refcnt));
847
848         }
849 }
850
851 /* Caller must hold iface_stat_list_lock */
852 static struct iface_stat *iface_alloc(struct net_device *net_dev)
853 {
854         struct iface_stat *new_iface;
855         struct iface_stat_work *isw;
856
857         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
858         if (new_iface == NULL) {
859                 pr_err("qtaguid: iface_stat: create(%s): "
860                        "iface_stat alloc failed\n", net_dev->name);
861                 return NULL;
862         }
863         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
864         if (new_iface->ifname == NULL) {
865                 pr_err("qtaguid: iface_stat: create(%s): "
866                        "ifname alloc failed\n", net_dev->name);
867                 kfree(new_iface);
868                 return NULL;
869         }
870         spin_lock_init(&new_iface->tag_stat_list_lock);
871         new_iface->tag_stat_tree = RB_ROOT;
872         _iface_stat_set_active(new_iface, net_dev, true);
873
874         /*
875          * ipv6 notifier chains are atomic :(
876          * No create_proc_read_entry() for you!
877          */
878         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
879         if (!isw) {
880                 pr_err("qtaguid: iface_stat: create(%s): "
881                        "work alloc failed\n", new_iface->ifname);
882                 _iface_stat_set_active(new_iface, net_dev, false);
883                 kfree(new_iface->ifname);
884                 kfree(new_iface);
885                 return NULL;
886         }
887         isw->iface_entry = new_iface;
888         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
889         schedule_work(&isw->iface_work);
890         list_add(&new_iface->list, &iface_stat_list);
891         return new_iface;
892 }
893
894 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
895                                                struct iface_stat *iface)
896 {
897         struct rtnl_link_stats64 dev_stats, *stats;
898         bool stats_rewound;
899
900         stats = dev_get_stats(net_dev, &dev_stats);
901         /* No empty packets */
902         stats_rewound =
903                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
904                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
905
906         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
907                  "bytes rx/tx=%llu/%llu "
908                  "active=%d last_known=%d "
909                  "stats_rewound=%d\n", __func__,
910                  net_dev ? net_dev->name : "?",
911                  iface, net_dev,
912                  stats->rx_bytes, stats->tx_bytes,
913                  iface->active, iface->last_known_valid, stats_rewound);
914
915         if (iface->active && iface->last_known_valid && stats_rewound) {
916                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
917                              "iface reset its stats unexpectedly\n", __func__,
918                              net_dev->name);
919
920                 iface->totals_via_dev[IFS_TX].bytes +=
921                         iface->last_known[IFS_TX].bytes;
922                 iface->totals_via_dev[IFS_TX].packets +=
923                         iface->last_known[IFS_TX].packets;
924                 iface->totals_via_dev[IFS_RX].bytes +=
925                         iface->last_known[IFS_RX].bytes;
926                 iface->totals_via_dev[IFS_RX].packets +=
927                         iface->last_known[IFS_RX].packets;
928                 iface->last_known_valid = false;
929                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
930                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
931                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
932                          iface->last_known[IFS_TX].bytes);
933         }
934 }
935
936 /*
937  * Create a new entry for tracking the specified interface.
938  * Do nothing if the entry already exists.
939  * Called when an interface is configured with a valid IP address.
940  */
941 static void iface_stat_create(struct net_device *net_dev,
942                               struct in_ifaddr *ifa)
943 {
944         struct in_device *in_dev = NULL;
945         const char *ifname;
946         struct iface_stat *entry;
947         __be32 ipaddr = 0;
948         struct iface_stat *new_iface;
949
950         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
951                  net_dev ? net_dev->name : "?",
952                  ifa, net_dev);
953         if (!net_dev) {
954                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
955                 return;
956         }
957
958         ifname = net_dev->name;
959         if (!ifa) {
960                 in_dev = in_dev_get(net_dev);
961                 if (!in_dev) {
962                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
963                                ifname);
964                         return;
965                 }
966                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
967                          ifname, in_dev);
968                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
969                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
970                                  "ifa=%p ifa_label=%s\n",
971                                  ifname, ifa,
972                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
973                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
974                                 break;
975                 }
976         }
977
978         if (!ifa) {
979                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
980                          ifname);
981                 goto done_put;
982         }
983         ipaddr = ifa->ifa_local;
984
985         spin_lock_bh(&iface_stat_list_lock);
986         entry = get_iface_entry(ifname);
987         if (entry != NULL) {
988                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
989                          ifname, entry);
990                 iface_check_stats_reset_and_adjust(net_dev, entry);
991                 _iface_stat_set_active(entry, net_dev, true);
992                 IF_DEBUG("qtaguid: %s(%s): "
993                          "tracking now %d on ip=%pI4\n", __func__,
994                          entry->ifname, true, &ipaddr);
995                 goto done_unlock_put;
996         }
997
998         new_iface = iface_alloc(net_dev);
999         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1000                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1001 done_unlock_put:
1002         spin_unlock_bh(&iface_stat_list_lock);
1003 done_put:
1004         if (in_dev)
1005                 in_dev_put(in_dev);
1006 }
1007
1008 static void iface_stat_create_ipv6(struct net_device *net_dev,
1009                                    struct inet6_ifaddr *ifa)
1010 {
1011         struct in_device *in_dev;
1012         const char *ifname;
1013         struct iface_stat *entry;
1014         struct iface_stat *new_iface;
1015         int addr_type;
1016
1017         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1018                  ifa, net_dev, net_dev ? net_dev->name : "");
1019         if (!net_dev) {
1020                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1021                 return;
1022         }
1023         ifname = net_dev->name;
1024
1025         in_dev = in_dev_get(net_dev);
1026         if (!in_dev) {
1027                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1028                        ifname);
1029                 return;
1030         }
1031
1032         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1033                  ifname, in_dev);
1034
1035         if (!ifa) {
1036                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1037                          ifname);
1038                 goto done_put;
1039         }
1040         addr_type = ipv6_addr_type(&ifa->addr);
1041
1042         spin_lock_bh(&iface_stat_list_lock);
1043         entry = get_iface_entry(ifname);
1044         if (entry != NULL) {
1045                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1046                          ifname, entry);
1047                 iface_check_stats_reset_and_adjust(net_dev, entry);
1048                 _iface_stat_set_active(entry, net_dev, true);
1049                 IF_DEBUG("qtaguid: %s(%s): "
1050                          "tracking now %d on ip=%pI6c\n", __func__,
1051                          entry->ifname, true, &ifa->addr);
1052                 goto done_unlock_put;
1053         }
1054
1055         new_iface = iface_alloc(net_dev);
1056         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1057                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1058
1059 done_unlock_put:
1060         spin_unlock_bh(&iface_stat_list_lock);
1061 done_put:
1062         in_dev_put(in_dev);
1063 }
1064
1065 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1066 {
1067         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1068         return sock_tag_tree_search(&sock_tag_tree, sk);
1069 }
1070
1071 static struct sock_tag *get_sock_stat(const struct sock *sk)
1072 {
1073         struct sock_tag *sock_tag_entry;
1074         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1075         if (!sk)
1076                 return NULL;
1077         spin_lock_bh(&sock_tag_list_lock);
1078         sock_tag_entry = get_sock_stat_nl(sk);
1079         spin_unlock_bh(&sock_tag_list_lock);
1080         return sock_tag_entry;
1081 }
1082
1083 static int ipx_proto(const struct sk_buff *skb,
1084                      struct xt_action_param *par)
1085 {
1086         int thoff = 0, tproto;
1087
1088         switch (par->family) {
1089         case NFPROTO_IPV6:
1090                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1091                 if (tproto < 0)
1092                         MT_DEBUG("%s(): transport header not found in ipv6"
1093                                  " skb=%p\n", __func__, skb);
1094                 break;
1095         case NFPROTO_IPV4:
1096                 tproto = ip_hdr(skb)->protocol;
1097                 break;
1098         default:
1099                 tproto = IPPROTO_RAW;
1100         }
1101         return tproto;
1102 }
1103
1104 static void
1105 data_counters_update(struct data_counters *dc, int set,
1106                      enum ifs_tx_rx direction, int proto, int bytes)
1107 {
1108         switch (proto) {
1109         case IPPROTO_TCP:
1110                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1111                 break;
1112         case IPPROTO_UDP:
1113                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1114                 break;
1115         case IPPROTO_IP:
1116         default:
1117                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1118                                     1);
1119                 break;
1120         }
1121 }
1122
1123 /*
1124  * Update stats for the specified interface. Do nothing if the entry
1125  * does not exist (when a device was never configured with an IP address).
1126  * Called when an device is being unregistered.
1127  */
1128 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1129 {
1130         struct rtnl_link_stats64 dev_stats, *stats;
1131         struct iface_stat *entry;
1132
1133         stats = dev_get_stats(net_dev, &dev_stats);
1134         spin_lock_bh(&iface_stat_list_lock);
1135         entry = get_iface_entry(net_dev->name);
1136         if (entry == NULL) {
1137                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1138                          net_dev->name);
1139                 spin_unlock_bh(&iface_stat_list_lock);
1140                 return;
1141         }
1142
1143         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1144                  net_dev->name, entry);
1145         if (!entry->active) {
1146                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1147                          net_dev->name);
1148                 spin_unlock_bh(&iface_stat_list_lock);
1149                 return;
1150         }
1151
1152         if (stash_only) {
1153                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1154                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1155                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1156                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1157                 entry->last_known_valid = true;
1158                 IF_DEBUG("qtaguid: %s(%s): "
1159                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1160                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1161                 spin_unlock_bh(&iface_stat_list_lock);
1162                 return;
1163         }
1164         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1165         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1166         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1167         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1168         /* We don't need the last_known[] anymore */
1169         entry->last_known_valid = false;
1170         _iface_stat_set_active(entry, net_dev, false);
1171         IF_DEBUG("qtaguid: %s(%s): "
1172                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1173                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1174         spin_unlock_bh(&iface_stat_list_lock);
1175 }
1176
1177 /*
1178  * Update stats for the specified interface from the skb.
1179  * Do nothing if the entry
1180  * does not exist (when a device was never configured with an IP address).
1181  * Called on each sk.
1182  */
1183 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1184                                        struct xt_action_param *par)
1185 {
1186         struct iface_stat *entry;
1187         const struct net_device *el_dev;
1188         enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1189         int bytes = skb->len;
1190         int proto;
1191
1192         if (!skb->dev) {
1193                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1194                 el_dev = par->in ? : par->out;
1195         } else {
1196                 const struct net_device *other_dev;
1197                 el_dev = skb->dev;
1198                 other_dev = par->in ? : par->out;
1199                 if (el_dev != other_dev) {
1200                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1201                                  "par->(in/out)=%p %s\n",
1202                                  par->hooknum, el_dev, el_dev->name, other_dev,
1203                                  other_dev->name);
1204                 }
1205         }
1206
1207         if (unlikely(!el_dev)) {
1208                 pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
1209                                    par->hooknum, __func__);
1210                 BUG();
1211         } else if (unlikely(!el_dev->name)) {
1212                 pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
1213                                    par->hooknum, __func__);
1214                 BUG();
1215         } else {
1216                 proto = ipx_proto(skb, par);
1217                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1218                          par->hooknum, el_dev->name, el_dev->type,
1219                          par->family, proto);
1220         }
1221
1222         spin_lock_bh(&iface_stat_list_lock);
1223         entry = get_iface_entry(el_dev->name);
1224         if (entry == NULL) {
1225                 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1226                          __func__, el_dev->name);
1227                 spin_unlock_bh(&iface_stat_list_lock);
1228                 return;
1229         }
1230
1231         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1232                  el_dev->name, entry);
1233
1234         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1235                              bytes);
1236         spin_unlock_bh(&iface_stat_list_lock);
1237 }
1238
1239 static void tag_stat_update(struct tag_stat *tag_entry,
1240                         enum ifs_tx_rx direction, int proto, int bytes)
1241 {
1242         int active_set;
1243         active_set = get_active_counter_set(tag_entry->tn.tag);
1244         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1245                  "dir=%d proto=%d bytes=%d)\n",
1246                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1247                  active_set, direction, proto, bytes);
1248         data_counters_update(&tag_entry->counters, active_set, direction,
1249                              proto, bytes);
1250         if (tag_entry->parent_counters)
1251                 data_counters_update(tag_entry->parent_counters, active_set,
1252                                      direction, proto, bytes);
1253 }
1254
1255 /*
1256  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1257  * the interface.
1258  * iface_entry->tag_stat_list_lock should be held.
1259  */
1260 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1261                                            tag_t tag)
1262 {
1263         struct tag_stat *new_tag_stat_entry = NULL;
1264         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1265                  " (uid=%u)\n", __func__,
1266                  iface_entry, tag, get_uid_from_tag(tag));
1267         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1268         if (!new_tag_stat_entry) {
1269                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1270                 goto done;
1271         }
1272         new_tag_stat_entry->tn.tag = tag;
1273         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1274 done:
1275         return new_tag_stat_entry;
1276 }
1277
1278 static void if_tag_stat_update(const char *ifname, uid_t uid,
1279                                const struct sock *sk, enum ifs_tx_rx direction,
1280                                int proto, int bytes)
1281 {
1282         struct tag_stat *tag_stat_entry;
1283         tag_t tag, acct_tag;
1284         tag_t uid_tag;
1285         struct data_counters *uid_tag_counters;
1286         struct sock_tag *sock_tag_entry;
1287         struct iface_stat *iface_entry;
1288         struct tag_stat *new_tag_stat = NULL;
1289         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1290                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1291                  ifname, uid, sk, direction, proto, bytes);
1292
1293
1294         iface_entry = get_iface_entry(ifname);
1295         if (!iface_entry) {
1296                 pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
1297                                    "%s not found\n", ifname);
1298                 return;
1299         }
1300         /* It is ok to process data when an iface_entry is inactive */
1301
1302         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1303                  ifname, iface_entry);
1304
1305         /*
1306          * Look for a tagged sock.
1307          * It will have an acct_uid.
1308          */
1309         sock_tag_entry = get_sock_stat(sk);
1310         if (sock_tag_entry) {
1311                 tag = sock_tag_entry->tag;
1312                 acct_tag = get_atag_from_tag(tag);
1313                 uid_tag = get_utag_from_tag(tag);
1314         } else {
1315                 acct_tag = make_atag_from_value(0);
1316                 tag = combine_atag_with_uid(acct_tag, uid);
1317                 uid_tag = make_tag_from_uid(uid);
1318         }
1319         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1320                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1321                  tag, get_uid_from_tag(tag), iface_entry);
1322         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1323         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1324
1325         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1326                                               tag);
1327         if (tag_stat_entry) {
1328                 /*
1329                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1330                  * {0, uid_tag} will also get updated.
1331                  */
1332                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1333                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1334                 return;
1335         }
1336
1337         /* Loop over tag list under this interface for {0,uid_tag} */
1338         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1339                                               uid_tag);
1340         if (!tag_stat_entry) {
1341                 /* Here: the base uid_tag did not exist */
1342                 /*
1343                  * No parent counters. So
1344                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1345                  */
1346                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1347                 if (!new_tag_stat)
1348                         goto unlock;
1349                 uid_tag_counters = &new_tag_stat->counters;
1350         } else {
1351                 uid_tag_counters = &tag_stat_entry->counters;
1352         }
1353
1354         if (acct_tag) {
1355                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1356                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1357                 if (!new_tag_stat)
1358                         goto unlock;
1359                 new_tag_stat->parent_counters = uid_tag_counters;
1360         } else {
1361                 /*
1362                  * For new_tag_stat to be still NULL here would require:
1363                  *  {0, uid_tag} exists
1364                  *  and {acct_tag, uid_tag} doesn't exist
1365                  *  AND acct_tag == 0.
1366                  * Impossible. This reassures us that new_tag_stat
1367                  * below will always be assigned.
1368                  */
1369                 BUG_ON(!new_tag_stat);
1370         }
1371         tag_stat_update(new_tag_stat, direction, proto, bytes);
1372 unlock:
1373         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1374 }
1375
1376 static int iface_netdev_event_handler(struct notifier_block *nb,
1377                                       unsigned long event, void *ptr) {
1378         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1379
1380         if (unlikely(module_passive))
1381                 return NOTIFY_DONE;
1382
1383         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1384                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1385                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1386
1387         switch (event) {
1388         case NETDEV_UP:
1389                 iface_stat_create(dev, NULL);
1390                 atomic64_inc(&qtu_events.iface_events);
1391                 break;
1392         case NETDEV_DOWN:
1393         case NETDEV_UNREGISTER:
1394                 iface_stat_update(dev, event == NETDEV_DOWN);
1395                 atomic64_inc(&qtu_events.iface_events);
1396                 break;
1397         }
1398         return NOTIFY_DONE;
1399 }
1400
1401 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1402                                          unsigned long event, void *ptr)
1403 {
1404         struct inet6_ifaddr *ifa = ptr;
1405         struct net_device *dev;
1406
1407         if (unlikely(module_passive))
1408                 return NOTIFY_DONE;
1409
1410         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1411                  "ev=0x%lx/%s ifa=%p\n",
1412                  event, netdev_evt_str(event), ifa);
1413
1414         switch (event) {
1415         case NETDEV_UP:
1416                 BUG_ON(!ifa || !ifa->idev);
1417                 dev = (struct net_device *)ifa->idev->dev;
1418                 iface_stat_create_ipv6(dev, ifa);
1419                 atomic64_inc(&qtu_events.iface_events);
1420                 break;
1421         case NETDEV_DOWN:
1422         case NETDEV_UNREGISTER:
1423                 BUG_ON(!ifa || !ifa->idev);
1424                 dev = (struct net_device *)ifa->idev->dev;
1425                 iface_stat_update(dev, event == NETDEV_DOWN);
1426                 atomic64_inc(&qtu_events.iface_events);
1427                 break;
1428         }
1429         return NOTIFY_DONE;
1430 }
1431
1432 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1433                                         unsigned long event, void *ptr)
1434 {
1435         struct in_ifaddr *ifa = ptr;
1436         struct net_device *dev;
1437
1438         if (unlikely(module_passive))
1439                 return NOTIFY_DONE;
1440
1441         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1442                  "ev=0x%lx/%s ifa=%p\n",
1443                  event, netdev_evt_str(event), ifa);
1444
1445         switch (event) {
1446         case NETDEV_UP:
1447                 BUG_ON(!ifa || !ifa->ifa_dev);
1448                 dev = ifa->ifa_dev->dev;
1449                 iface_stat_create(dev, ifa);
1450                 atomic64_inc(&qtu_events.iface_events);
1451                 break;
1452         case NETDEV_DOWN:
1453         case NETDEV_UNREGISTER:
1454                 BUG_ON(!ifa || !ifa->ifa_dev);
1455                 dev = ifa->ifa_dev->dev;
1456                 iface_stat_update(dev, event == NETDEV_DOWN);
1457                 atomic64_inc(&qtu_events.iface_events);
1458                 break;
1459         }
1460         return NOTIFY_DONE;
1461 }
1462
1463 static struct notifier_block iface_netdev_notifier_blk = {
1464         .notifier_call = iface_netdev_event_handler,
1465 };
1466
1467 static struct notifier_block iface_inetaddr_notifier_blk = {
1468         .notifier_call = iface_inetaddr_event_handler,
1469 };
1470
1471 static struct notifier_block iface_inet6addr_notifier_blk = {
1472         .notifier_call = iface_inet6addr_event_handler,
1473 };
1474
1475 static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
1476         .start  = iface_stat_fmt_proc_start,
1477         .next   = iface_stat_fmt_proc_next,
1478         .stop   = iface_stat_fmt_proc_stop,
1479         .show   = iface_stat_fmt_proc_show,
1480 };
1481
1482 static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
1483 {
1484         struct proc_iface_stat_fmt_info *s;
1485
1486         s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
1487                         sizeof(struct proc_iface_stat_fmt_info));
1488         if (!s)
1489                 return -ENOMEM;
1490
1491         s->fmt = (uintptr_t)PDE_DATA(inode);
1492         return 0;
1493 }
1494
1495 static const struct file_operations proc_iface_stat_fmt_fops = {
1496         .open           = proc_iface_stat_fmt_open,
1497         .read           = seq_read,
1498         .llseek         = seq_lseek,
1499         .release        = seq_release_private,
1500 };
1501
1502 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1503 {
1504         int err;
1505
1506         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1507         if (!iface_stat_procdir) {
1508                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1509                 err = -1;
1510                 goto err;
1511         }
1512
1513         iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
1514                                                    proc_iface_perms,
1515                                                    parent_procdir,
1516                                                    &proc_iface_stat_fmt_fops,
1517                                                    (void *)1 /* fmt1 */);
1518         if (!iface_stat_all_procfile) {
1519                 pr_err("qtaguid: iface_stat: init "
1520                        " failed to create stat_old proc entry\n");
1521                 err = -1;
1522                 goto err_zap_entry;
1523         }
1524
1525         iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
1526                                                    proc_iface_perms,
1527                                                    parent_procdir,
1528                                                    &proc_iface_stat_fmt_fops,
1529                                                    (void *)2 /* fmt2 */);
1530         if (!iface_stat_fmt_procfile) {
1531                 pr_err("qtaguid: iface_stat: init "
1532                        " failed to create stat_all proc entry\n");
1533                 err = -1;
1534                 goto err_zap_all_stats_entry;
1535         }
1536
1537
1538         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1539         if (err) {
1540                 pr_err("qtaguid: iface_stat: init "
1541                        "failed to register dev event handler\n");
1542                 goto err_zap_all_stats_entries;
1543         }
1544         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1545         if (err) {
1546                 pr_err("qtaguid: iface_stat: init "
1547                        "failed to register ipv4 dev event handler\n");
1548                 goto err_unreg_nd;
1549         }
1550
1551         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1552         if (err) {
1553                 pr_err("qtaguid: iface_stat: init "
1554                        "failed to register ipv6 dev event handler\n");
1555                 goto err_unreg_ip4_addr;
1556         }
1557         return 0;
1558
1559 err_unreg_ip4_addr:
1560         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1561 err_unreg_nd:
1562         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1563 err_zap_all_stats_entries:
1564         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1565 err_zap_all_stats_entry:
1566         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1567 err_zap_entry:
1568         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1569 err:
1570         return err;
1571 }
1572
1573 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1574                                     struct xt_action_param *par)
1575 {
1576         struct sock *sk;
1577         unsigned int hook_mask = (1 << par->hooknum);
1578
1579         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1580                  par->hooknum, par->family);
1581
1582         /*
1583          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1584          * return garbage SKs.
1585          */
1586         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1587                 return NULL;
1588
1589         switch (par->family) {
1590         case NFPROTO_IPV6:
1591                 sk = xt_socket_get6_sk(skb, par);
1592                 break;
1593         case NFPROTO_IPV4:
1594                 sk = xt_socket_get4_sk(skb, par);
1595                 break;
1596         default:
1597                 return NULL;
1598         }
1599
1600         if (sk) {
1601                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1602                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1603                 /*
1604                  * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1605                  * "struct inet_timewait_sock" which is missing fields.
1606                  */
1607                 if (sk->sk_state  == TCP_TIME_WAIT) {
1608                         xt_socket_put_sk(sk);
1609                         sk = NULL;
1610                 }
1611         }
1612         return sk;
1613 }
1614
1615 static void account_for_uid(const struct sk_buff *skb,
1616                             const struct sock *alternate_sk, uid_t uid,
1617                             struct xt_action_param *par)
1618 {
1619         const struct net_device *el_dev;
1620
1621         if (!skb->dev) {
1622                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1623                 el_dev = par->in ? : par->out;
1624         } else {
1625                 const struct net_device *other_dev;
1626                 el_dev = skb->dev;
1627                 other_dev = par->in ? : par->out;
1628                 if (el_dev != other_dev) {
1629                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1630                                 "par->(in/out)=%p %s\n",
1631                                 par->hooknum, el_dev, el_dev->name, other_dev,
1632                                 other_dev->name);
1633                 }
1634         }
1635
1636         if (unlikely(!el_dev)) {
1637                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1638         } else if (unlikely(!el_dev->name)) {
1639                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1640         } else {
1641                 int proto = ipx_proto(skb, par);
1642                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1643                          par->hooknum, el_dev->name, el_dev->type,
1644                          par->family, proto);
1645
1646                 if_tag_stat_update(el_dev->name, uid,
1647                                 skb->sk ? skb->sk : alternate_sk,
1648                                 par->in ? IFS_RX : IFS_TX,
1649                                 proto, skb->len);
1650         }
1651 }
1652
1653 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1654 {
1655         const struct xt_qtaguid_match_info *info = par->matchinfo;
1656         const struct file *filp;
1657         bool got_sock = false;
1658         struct sock *sk;
1659         kuid_t sock_uid;
1660         bool res;
1661
1662         if (unlikely(module_passive))
1663                 return (info->match ^ info->invert) == 0;
1664
1665         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1666                  par->hooknum, skb, par->in, par->out, par->family);
1667
1668         atomic64_inc(&qtu_events.match_calls);
1669         if (skb == NULL) {
1670                 res = (info->match ^ info->invert) == 0;
1671                 goto ret_res;
1672         }
1673
1674         switch (par->hooknum) {
1675         case NF_INET_PRE_ROUTING:
1676         case NF_INET_POST_ROUTING:
1677                 atomic64_inc(&qtu_events.match_calls_prepost);
1678                 iface_stat_update_from_skb(skb, par);
1679                 /*
1680                  * We are done in pre/post. The skb will get processed
1681                  * further alter.
1682                  */
1683                 res = (info->match ^ info->invert);
1684                 goto ret_res;
1685                 break;
1686         /* default: Fall through and do UID releated work */
1687         }
1688
1689         sk = skb->sk;
1690         /*
1691          * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1692          * "struct inet_timewait_sock" which is missing fields.
1693          * So we ignore it.
1694          */
1695         if (sk && sk->sk_state == TCP_TIME_WAIT)
1696                 sk = NULL;
1697         if (sk == NULL) {
1698                 /*
1699                  * A missing sk->sk_socket happens when packets are in-flight
1700                  * and the matching socket is already closed and gone.
1701                  */
1702                 sk = qtaguid_find_sk(skb, par);
1703                 /*
1704                  * If we got the socket from the find_sk(), we will need to put
1705                  * it back, as nf_tproxy_get_sock_v4() got it.
1706                  */
1707                 got_sock = sk;
1708                 if (sk)
1709                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1710                 else
1711                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1712         } else {
1713                 atomic64_inc(&qtu_events.match_found_sk);
1714         }
1715         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1716                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1717         if (sk != NULL) {
1718                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1719                         par->hooknum, sk, sk->sk_socket,
1720                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1721                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1722                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1723                         par->hooknum, filp ? from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1);
1724         }
1725
1726         if (sk == NULL || sk->sk_socket == NULL) {
1727                 /*
1728                  * Here, the qtaguid_find_sk() using connection tracking
1729                  * couldn't find the owner, so for now we just count them
1730                  * against the system.
1731                  */
1732                 /*
1733                  * TODO: unhack how to force just accounting.
1734                  * For now we only do iface stats when the uid-owner is not
1735                  * requested.
1736                  */
1737                 if (!(info->match & XT_QTAGUID_UID))
1738                         account_for_uid(skb, sk, 0, par);
1739                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1740                         par->hooknum,
1741                         sk ? sk->sk_socket : NULL);
1742                 res = (info->match ^ info->invert) == 0;
1743                 atomic64_inc(&qtu_events.match_no_sk);
1744                 goto put_sock_ret_res;
1745         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1746                 res = false;
1747                 goto put_sock_ret_res;
1748         }
1749         filp = sk->sk_socket->file;
1750         if (filp == NULL) {
1751                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1752                 account_for_uid(skb, sk, 0, par);
1753                 res = ((info->match ^ info->invert) &
1754                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1755                 atomic64_inc(&qtu_events.match_no_sk_file);
1756                 goto put_sock_ret_res;
1757         }
1758         sock_uid = filp->f_cred->fsuid;
1759         /*
1760          * TODO: unhack how to force just accounting.
1761          * For now we only do iface stats when the uid-owner is not requested
1762          */
1763         if (!(info->match & XT_QTAGUID_UID))
1764                 account_for_uid(skb, sk, from_kuid(&init_user_ns, sock_uid), par);
1765
1766         /*
1767          * The following two tests fail the match when:
1768          *    id not in range AND no inverted condition requested
1769          * or id     in range AND    inverted condition requested
1770          * Thus (!a && b) || (a && !b) == a ^ b
1771          */
1772         if (info->match & XT_QTAGUID_UID) {
1773                 kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
1774                 kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
1775
1776                 if (uid_gte(filp->f_cred->fsuid, uid_min) &&
1777                      uid_lte(filp->f_cred->fsuid, uid_max) ^
1778                     !(info->invert & XT_QTAGUID_UID)) {
1779                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1780                                  par->hooknum);
1781                         res = false;
1782                         goto put_sock_ret_res;
1783                 }
1784         }
1785         if (info->match & XT_QTAGUID_GID) {
1786                 kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
1787                 kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
1788
1789                 if (gid_gte(filp->f_cred->fsgid, gid_min) &&
1790                                 gid_lte(filp->f_cred->fsgid, gid_max) ^
1791                         !(info->invert & XT_QTAGUID_GID)) {
1792                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1793                                 par->hooknum);
1794                         res = false;
1795                         goto put_sock_ret_res;
1796                 }
1797         }
1798         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1799         res = true;
1800
1801 put_sock_ret_res:
1802         if (got_sock)
1803                 xt_socket_put_sk(sk);
1804 ret_res:
1805         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1806         return res;
1807 }
1808
1809 #ifdef DDEBUG
1810 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1811 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1812 {
1813         va_list args;
1814         char *fmt_buff;
1815         char *buff;
1816
1817         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1818                 return;
1819
1820         fmt_buff = kasprintf(GFP_ATOMIC,
1821                              "qtaguid: %s(): %s {\n", __func__, fmt);
1822         BUG_ON(!fmt_buff);
1823         va_start(args, fmt);
1824         buff = kvasprintf(GFP_ATOMIC,
1825                           fmt_buff, args);
1826         BUG_ON(!buff);
1827         pr_debug("%s", buff);
1828         kfree(fmt_buff);
1829         kfree(buff);
1830         va_end(args);
1831
1832         spin_lock_bh(&sock_tag_list_lock);
1833         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1834         spin_unlock_bh(&sock_tag_list_lock);
1835
1836         spin_lock_bh(&sock_tag_list_lock);
1837         spin_lock_bh(&uid_tag_data_tree_lock);
1838         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1839         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1840         spin_unlock_bh(&uid_tag_data_tree_lock);
1841         spin_unlock_bh(&sock_tag_list_lock);
1842
1843         spin_lock_bh(&iface_stat_list_lock);
1844         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1845         spin_unlock_bh(&iface_stat_list_lock);
1846
1847         pr_debug("qtaguid: %s(): }\n", __func__);
1848 }
1849 #else
1850 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1851 #endif
1852
1853 struct proc_ctrl_print_info {
1854         struct sock *sk; /* socket found by reading to sk_pos */
1855         loff_t sk_pos;
1856 };
1857
1858 static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
1859 {
1860         struct proc_ctrl_print_info *pcpi = m->private;
1861         struct sock_tag *sock_tag_entry = v;
1862         struct rb_node *node;
1863
1864         (*pos)++;
1865
1866         if (!v || v  == SEQ_START_TOKEN)
1867                 return NULL;
1868
1869         node = rb_next(&sock_tag_entry->sock_node);
1870         if (!node) {
1871                 pcpi->sk = NULL;
1872                 sock_tag_entry = SEQ_START_TOKEN;
1873         } else {
1874                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1875                 pcpi->sk = sock_tag_entry->sk;
1876         }
1877         pcpi->sk_pos = *pos;
1878         return sock_tag_entry;
1879 }
1880
1881 static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
1882 {
1883         struct proc_ctrl_print_info *pcpi = m->private;
1884         struct sock_tag *sock_tag_entry;
1885         struct rb_node *node;
1886
1887         spin_lock_bh(&sock_tag_list_lock);
1888
1889         if (unlikely(module_passive))
1890                 return NULL;
1891
1892         if (*pos == 0) {
1893                 pcpi->sk_pos = 0;
1894                 node = rb_first(&sock_tag_tree);
1895                 if (!node) {
1896                         pcpi->sk = NULL;
1897                         return SEQ_START_TOKEN;
1898                 }
1899                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1900                 pcpi->sk = sock_tag_entry->sk;
1901         } else {
1902                 sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
1903                                                 NULL) ?: SEQ_START_TOKEN;
1904                 if (*pos != pcpi->sk_pos) {
1905                         /* seq_read skipped a next call */
1906                         *pos = pcpi->sk_pos;
1907                         return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
1908                 }
1909         }
1910         return sock_tag_entry;
1911 }
1912
1913 static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
1914 {
1915         spin_unlock_bh(&sock_tag_list_lock);
1916 }
1917
1918 /*
1919  * Procfs reader to get all active socket tags using style "1)" as described in
1920  * fs/proc/generic.c
1921  */
1922 static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
1923 {
1924         struct sock_tag *sock_tag_entry = v;
1925         uid_t uid;
1926         long f_count;
1927
1928         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1929                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
1930
1931         if (sock_tag_entry != SEQ_START_TOKEN) {
1932                 uid = get_uid_from_tag(sock_tag_entry->tag);
1933                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1934                          "pid=%u\n",
1935                          sock_tag_entry->sk,
1936                          sock_tag_entry->tag,
1937                          uid,
1938                          sock_tag_entry->pid
1939                         );
1940                 f_count = atomic_long_read(
1941                         &sock_tag_entry->socket->file->f_count);
1942                 seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
1943                            "f_count=%lu\n",
1944                            sock_tag_entry->sk,
1945                            sock_tag_entry->tag, uid,
1946                            sock_tag_entry->pid, f_count);
1947         } else {
1948                 seq_printf(m, "events: sockets_tagged=%llu "
1949                            "sockets_untagged=%llu "
1950                            "counter_set_changes=%llu "
1951                            "delete_cmds=%llu "
1952                            "iface_events=%llu "
1953                            "match_calls=%llu "
1954                            "match_calls_prepost=%llu "
1955                            "match_found_sk=%llu "
1956                            "match_found_sk_in_ct=%llu "
1957                            "match_found_no_sk_in_ct=%llu "
1958                            "match_no_sk=%llu "
1959                            "match_no_sk_file=%llu\n",
1960                            (u64)atomic64_read(&qtu_events.sockets_tagged),
1961                            (u64)atomic64_read(&qtu_events.sockets_untagged),
1962                            (u64)atomic64_read(&qtu_events.counter_set_changes),
1963                            (u64)atomic64_read(&qtu_events.delete_cmds),
1964                            (u64)atomic64_read(&qtu_events.iface_events),
1965                            (u64)atomic64_read(&qtu_events.match_calls),
1966                            (u64)atomic64_read(&qtu_events.match_calls_prepost),
1967                            (u64)atomic64_read(&qtu_events.match_found_sk),
1968                            (u64)atomic64_read(&qtu_events.match_found_sk_in_ct),
1969                            (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct),
1970                            (u64)atomic64_read(&qtu_events.match_no_sk),
1971                            (u64)atomic64_read(&qtu_events.match_no_sk_file));
1972
1973                 /* Count the following as part of the last item_index */
1974                 prdebug_full_state(0, "proc ctrl");
1975         }
1976
1977         return 0;
1978 }
1979
1980 /*
1981  * Delete socket tags, and stat tags associated with a given
1982  * accouting tag and uid.
1983  */
1984 static int ctrl_cmd_delete(const char *input)
1985 {
1986         char cmd;
1987         int uid_int;
1988         kuid_t uid;
1989         uid_t entry_uid;
1990         tag_t acct_tag;
1991         tag_t tag;
1992         int res, argc;
1993         struct iface_stat *iface_entry;
1994         struct rb_node *node;
1995         struct sock_tag *st_entry;
1996         struct rb_root st_to_free_tree = RB_ROOT;
1997         struct tag_stat *ts_entry;
1998         struct tag_counter_set *tcs_entry;
1999         struct tag_ref *tr_entry;
2000         struct uid_tag_data *utd_entry;
2001
2002         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid_int);
2003         uid = make_kuid(&init_user_ns, uid_int);
2004         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
2005                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2006                  acct_tag, uid_int);
2007         if (argc < 2) {
2008                 res = -EINVAL;
2009                 goto err;
2010         }
2011         if (!valid_atag(acct_tag)) {
2012                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2013                 res = -EINVAL;
2014                 goto err;
2015         }
2016         if (argc < 3) {
2017                 uid = current_fsuid();
2018                 uid_int = from_kuid(&init_user_ns, uid);
2019         } else if (!can_impersonate_uid(uid)) {
2020                 pr_info("qtaguid: ctrl_delete(%s): "
2021                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2022                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2023                 res = -EPERM;
2024                 goto err;
2025         }
2026
2027         tag = combine_atag_with_uid(acct_tag, uid_int);
2028         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2029                  "looking for tag=0x%llx (uid=%u)\n",
2030                  input, tag, uid_int);
2031
2032         /* Delete socket tags */
2033         spin_lock_bh(&sock_tag_list_lock);
2034         node = rb_first(&sock_tag_tree);
2035         while (node) {
2036                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2037                 entry_uid = get_uid_from_tag(st_entry->tag);
2038                 node = rb_next(node);
2039                 if (entry_uid != uid_int)
2040                         continue;
2041
2042                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2043                          input, st_entry->tag, entry_uid);
2044
2045                 if (!acct_tag || st_entry->tag == tag) {
2046                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2047                         /* Can't sockfd_put() within spinlock, do it later. */
2048                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2049                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2050                         BUG_ON(tr_entry->num_sock_tags <= 0);
2051                         tr_entry->num_sock_tags--;
2052                         /*
2053                          * TODO: remove if, and start failing.
2054                          * This is a hack to work around the fact that in some
2055                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2056                          * and are trying to work around apps
2057                          * that didn't open the /dev/xt_qtaguid.
2058                          */
2059                         if (st_entry->list.next && st_entry->list.prev)
2060                                 list_del(&st_entry->list);
2061                 }
2062         }
2063         spin_unlock_bh(&sock_tag_list_lock);
2064
2065         sock_tag_tree_erase(&st_to_free_tree);
2066
2067         /* Delete tag counter-sets */
2068         spin_lock_bh(&tag_counter_set_list_lock);
2069         /* Counter sets are only on the uid tag, not full tag */
2070         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2071         if (tcs_entry) {
2072                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2073                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2074                          input,
2075                          tcs_entry->tn.tag,
2076                          get_uid_from_tag(tcs_entry->tn.tag),
2077                          tcs_entry->active_set);
2078                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2079                 kfree(tcs_entry);
2080         }
2081         spin_unlock_bh(&tag_counter_set_list_lock);
2082
2083         /*
2084          * If acct_tag is 0, then all entries belonging to uid are
2085          * erased.
2086          */
2087         spin_lock_bh(&iface_stat_list_lock);
2088         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2089                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2090                 node = rb_first(&iface_entry->tag_stat_tree);
2091                 while (node) {
2092                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2093                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2094                         node = rb_next(node);
2095
2096                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2097                                  "ts tag=0x%llx (uid=%u)\n",
2098                                  input, ts_entry->tn.tag, entry_uid);
2099
2100                         if (entry_uid != uid_int)
2101                                 continue;
2102                         if (!acct_tag || ts_entry->tn.tag == tag) {
2103                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2104                                          "erase ts: %s 0x%llx %u\n",
2105                                          input, iface_entry->ifname,
2106                                          get_atag_from_tag(ts_entry->tn.tag),
2107                                          entry_uid);
2108                                 rb_erase(&ts_entry->tn.node,
2109                                          &iface_entry->tag_stat_tree);
2110                                 kfree(ts_entry);
2111                         }
2112                 }
2113                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2114         }
2115         spin_unlock_bh(&iface_stat_list_lock);
2116
2117         /* Cleanup the uid_tag_data */
2118         spin_lock_bh(&uid_tag_data_tree_lock);
2119         node = rb_first(&uid_tag_data_tree);
2120         while (node) {
2121                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2122                 entry_uid = utd_entry->uid;
2123                 node = rb_next(node);
2124
2125                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2126                          "utd uid=%u\n",
2127                          input, entry_uid);
2128
2129                 if (entry_uid != uid_int)
2130                         continue;
2131                 /*
2132                  * Go over the tag_refs, and those that don't have
2133                  * sock_tags using them are freed.
2134                  */
2135                 put_tag_ref_tree(tag, utd_entry);
2136                 put_utd_entry(utd_entry);
2137         }
2138         spin_unlock_bh(&uid_tag_data_tree_lock);
2139
2140         atomic64_inc(&qtu_events.delete_cmds);
2141         res = 0;
2142
2143 err:
2144         return res;
2145 }
2146
2147 static int ctrl_cmd_counter_set(const char *input)
2148 {
2149         char cmd;
2150         uid_t uid = 0;
2151         tag_t tag;
2152         int res, argc;
2153         struct tag_counter_set *tcs;
2154         int counter_set;
2155
2156         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2157         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2158                  "set=%d uid=%u\n", input, argc, cmd,
2159                  counter_set, uid);
2160         if (argc != 3) {
2161                 res = -EINVAL;
2162                 goto err;
2163         }
2164         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2165                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2166                         input);
2167                 res = -EINVAL;
2168                 goto err;
2169         }
2170         if (!can_manipulate_uids()) {
2171                 pr_info("qtaguid: ctrl_counterset(%s): "
2172                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2173                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2174                 res = -EPERM;
2175                 goto err;
2176         }
2177
2178         tag = make_tag_from_uid(uid);
2179         spin_lock_bh(&tag_counter_set_list_lock);
2180         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2181         if (!tcs) {
2182                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2183                 if (!tcs) {
2184                         spin_unlock_bh(&tag_counter_set_list_lock);
2185                         pr_err("qtaguid: ctrl_counterset(%s): "
2186                                "failed to alloc counter set\n",
2187                                input);
2188                         res = -ENOMEM;
2189                         goto err;
2190                 }
2191                 tcs->tn.tag = tag;
2192                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2193                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2194                          "(uid=%u) set=%d\n",
2195                          input, tag, get_uid_from_tag(tag), counter_set);
2196         }
2197         tcs->active_set = counter_set;
2198         spin_unlock_bh(&tag_counter_set_list_lock);
2199         atomic64_inc(&qtu_events.counter_set_changes);
2200         res = 0;
2201
2202 err:
2203         return res;
2204 }
2205
2206 static int ctrl_cmd_tag(const char *input)
2207 {
2208         char cmd;
2209         int sock_fd = 0;
2210         kuid_t uid;
2211         unsigned int uid_int = 0;
2212         tag_t acct_tag = make_atag_from_value(0);
2213         tag_t full_tag;
2214         struct socket *el_socket;
2215         int res, argc;
2216         struct sock_tag *sock_tag_entry;
2217         struct tag_ref *tag_ref_entry;
2218         struct uid_tag_data *uid_tag_data_entry;
2219         struct proc_qtu_data *pqd_entry;
2220
2221         /* Unassigned args will get defaulted later. */
2222         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid_int);
2223         uid = make_kuid(&init_user_ns, uid_int);
2224         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2225                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2226                  acct_tag, uid_int);
2227         if (argc < 2) {
2228                 res = -EINVAL;
2229                 goto err;
2230         }
2231         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2232         if (!el_socket) {
2233                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2234                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2235                         input, sock_fd, res, current->pid, current->tgid,
2236                         from_kuid(&init_user_ns, current_fsuid()));
2237                 goto err;
2238         }
2239         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2240                  input, atomic_long_read(&el_socket->file->f_count),
2241                  el_socket->sk);
2242         if (argc < 3) {
2243                 acct_tag = make_atag_from_value(0);
2244         } else if (!valid_atag(acct_tag)) {
2245                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2246                 res = -EINVAL;
2247                 goto err_put;
2248         }
2249         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2250                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2251                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2252                  input, current->pid, current->tgid,
2253                  from_kuid(&init_user_ns, current_uid()),
2254                  from_kuid(&init_user_ns, current_euid()),
2255                  from_kuid(&init_user_ns, current_fsuid()),
2256                  from_kgid(&init_user_ns, xt_qtaguid_ctrl_file->gid),
2257                  in_group_p(xt_qtaguid_ctrl_file->gid),
2258                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2259         if (argc < 4) {
2260                 uid = current_fsuid();
2261                 uid_int = from_kuid(&init_user_ns, uid);
2262         } else if (!can_impersonate_uid(uid)) {
2263                 pr_info("qtaguid: ctrl_tag(%s): "
2264                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2265                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2266                 res = -EPERM;
2267                 goto err_put;
2268         }
2269         full_tag = combine_atag_with_uid(acct_tag, uid_int);
2270
2271         spin_lock_bh(&sock_tag_list_lock);
2272         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2273         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2274         if (IS_ERR(tag_ref_entry)) {
2275                 res = PTR_ERR(tag_ref_entry);
2276                 spin_unlock_bh(&sock_tag_list_lock);
2277                 goto err_put;
2278         }
2279         tag_ref_entry->num_sock_tags++;
2280         if (sock_tag_entry) {
2281                 struct tag_ref *prev_tag_ref_entry;
2282
2283                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2284                          "st@%p ...->f_count=%ld\n",
2285                          input, el_socket->sk, sock_tag_entry,
2286                          atomic_long_read(&el_socket->file->f_count));
2287                 /*
2288                  * This is a re-tagging, so release the sock_fd that was
2289                  * locked at the time of the 1st tagging.
2290                  * There is still the ref from this call's sockfd_lookup() so
2291                  * it can be done within the spinlock.
2292                  */
2293                 sockfd_put(sock_tag_entry->socket);
2294                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2295                                                     &uid_tag_data_entry);
2296                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2297                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2298                 prev_tag_ref_entry->num_sock_tags--;
2299                 sock_tag_entry->tag = full_tag;
2300         } else {
2301                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2302                          input, el_socket->sk);
2303                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2304                                          GFP_ATOMIC);
2305                 if (!sock_tag_entry) {
2306                         pr_err("qtaguid: ctrl_tag(%s): "
2307                                "socket tag alloc failed\n",
2308                                input);
2309                         spin_unlock_bh(&sock_tag_list_lock);
2310                         res = -ENOMEM;
2311                         goto err_tag_unref_put;
2312                 }
2313                 sock_tag_entry->sk = el_socket->sk;
2314                 sock_tag_entry->socket = el_socket;
2315                 sock_tag_entry->pid = current->tgid;
2316                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int);
2317                 spin_lock_bh(&uid_tag_data_tree_lock);
2318                 pqd_entry = proc_qtu_data_tree_search(
2319                         &proc_qtu_data_tree, current->tgid);
2320                 /*
2321                  * TODO: remove if, and start failing.
2322                  * At first, we want to catch user-space code that is not
2323                  * opening the /dev/xt_qtaguid.
2324                  */
2325                 if (IS_ERR_OR_NULL(pqd_entry))
2326                         pr_warn_once(
2327                                 "qtaguid: %s(): "
2328                                 "User space forgot to open /dev/xt_qtaguid? "
2329                                 "pid=%u tgid=%u uid=%u\n", __func__,
2330                                 current->pid, current->tgid,
2331                                 from_kuid(&init_user_ns, current_fsuid()));
2332                 else
2333                         list_add(&sock_tag_entry->list,
2334                                  &pqd_entry->sock_tag_list);
2335                 spin_unlock_bh(&uid_tag_data_tree_lock);
2336
2337                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2338                 atomic64_inc(&qtu_events.sockets_tagged);
2339         }
2340         spin_unlock_bh(&sock_tag_list_lock);
2341         /* We keep the ref to the socket (file) until it is untagged */
2342         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2343                  input, sock_tag_entry,
2344                  atomic_long_read(&el_socket->file->f_count));
2345         return 0;
2346
2347 err_tag_unref_put:
2348         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2349         tag_ref_entry->num_sock_tags--;
2350         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2351 err_put:
2352         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2353                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2354         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2355         sockfd_put(el_socket);
2356         return res;
2357
2358 err:
2359         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2360         return res;
2361 }
2362
2363 static int ctrl_cmd_untag(const char *input)
2364 {
2365         char cmd;
2366         int sock_fd = 0;
2367         struct socket *el_socket;
2368         int res, argc;
2369         struct sock_tag *sock_tag_entry;
2370         struct tag_ref *tag_ref_entry;
2371         struct uid_tag_data *utd_entry;
2372         struct proc_qtu_data *pqd_entry;
2373
2374         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2375         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2376                  input, argc, cmd, sock_fd);
2377         if (argc < 2) {
2378                 res = -EINVAL;
2379                 goto err;
2380         }
2381         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2382         if (!el_socket) {
2383                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2384                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2385                         input, sock_fd, res, current->pid, current->tgid,
2386                         from_kuid(&init_user_ns, current_fsuid()));
2387                 goto err;
2388         }
2389         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2390                  input, atomic_long_read(&el_socket->file->f_count),
2391                  el_socket->sk);
2392         spin_lock_bh(&sock_tag_list_lock);
2393         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2394         if (!sock_tag_entry) {
2395                 spin_unlock_bh(&sock_tag_list_lock);
2396                 res = -EINVAL;
2397                 goto err_put;
2398         }
2399         /*
2400          * The socket already belongs to the current process
2401          * so it can do whatever it wants to it.
2402          */
2403         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2404
2405         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2406         BUG_ON(!tag_ref_entry);
2407         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2408         spin_lock_bh(&uid_tag_data_tree_lock);
2409         pqd_entry = proc_qtu_data_tree_search(
2410                 &proc_qtu_data_tree, current->tgid);
2411         /*
2412          * TODO: remove if, and start failing.
2413          * At first, we want to catch user-space code that is not
2414          * opening the /dev/xt_qtaguid.
2415          */
2416         if (IS_ERR_OR_NULL(pqd_entry))
2417                 pr_warn_once("qtaguid: %s(): "
2418                              "User space forgot to open /dev/xt_qtaguid? "
2419                              "pid=%u tgid=%u uid=%u\n", __func__,
2420                              current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2421         else
2422                 list_del(&sock_tag_entry->list);
2423         spin_unlock_bh(&uid_tag_data_tree_lock);
2424         /*
2425          * We don't free tag_ref from the utd_entry here,
2426          * only during a cmd_delete().
2427          */
2428         tag_ref_entry->num_sock_tags--;
2429         spin_unlock_bh(&sock_tag_list_lock);
2430         /*
2431          * Release the sock_fd that was grabbed at tag time,
2432          * and once more for the sockfd_lookup() here.
2433          */
2434         sockfd_put(sock_tag_entry->socket);
2435         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2436                  input, sock_tag_entry,
2437                  atomic_long_read(&el_socket->file->f_count) - 1);
2438         sockfd_put(el_socket);
2439
2440         kfree(sock_tag_entry);
2441         atomic64_inc(&qtu_events.sockets_untagged);
2442
2443         return 0;
2444
2445 err_put:
2446         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2447                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2448         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2449         sockfd_put(el_socket);
2450         return res;
2451
2452 err:
2453         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2454         return res;
2455 }
2456
2457 static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
2458 {
2459         char cmd;
2460         ssize_t res;
2461
2462         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2463                  input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2464
2465         cmd = input[0];
2466         /* Collect params for commands */
2467         switch (cmd) {
2468         case 'd':
2469                 res = ctrl_cmd_delete(input);
2470                 break;
2471
2472         case 's':
2473                 res = ctrl_cmd_counter_set(input);
2474                 break;
2475
2476         case 't':
2477                 res = ctrl_cmd_tag(input);
2478                 break;
2479
2480         case 'u':
2481                 res = ctrl_cmd_untag(input);
2482                 break;
2483
2484         default:
2485                 res = -EINVAL;
2486                 goto err;
2487         }
2488         if (!res)
2489                 res = count;
2490 err:
2491         CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
2492         return res;
2493 }
2494
2495 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2496 static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2497                                    size_t count, loff_t *offp)
2498 {
2499         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2500
2501         if (unlikely(module_passive))
2502                 return count;
2503
2504         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2505                 return -EINVAL;
2506
2507         if (copy_from_user(input_buf, buffer, count))
2508                 return -EFAULT;
2509
2510         input_buf[count] = '\0';
2511         return qtaguid_ctrl_parse(input_buf, count);
2512 }
2513
2514 struct proc_print_info {
2515         struct iface_stat *iface_entry;
2516         int item_index;
2517         tag_t tag; /* tag found by reading to tag_pos */
2518         off_t tag_pos;
2519         int tag_item_index;
2520 };
2521
2522 static void pp_stats_header(struct seq_file *m)
2523 {
2524         seq_puts(m,
2525                  "idx iface acct_tag_hex uid_tag_int cnt_set "
2526                  "rx_bytes rx_packets "
2527                  "tx_bytes tx_packets "
2528                  "rx_tcp_bytes rx_tcp_packets "
2529                  "rx_udp_bytes rx_udp_packets "
2530                  "rx_other_bytes rx_other_packets "
2531                  "tx_tcp_bytes tx_tcp_packets "
2532                  "tx_udp_bytes tx_udp_packets "
2533                  "tx_other_bytes tx_other_packets\n");
2534 }
2535
2536 static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
2537                          int cnt_set)
2538 {
2539         int ret;
2540         struct data_counters *cnts;
2541         tag_t tag = ts_entry->tn.tag;
2542         uid_t stat_uid = get_uid_from_tag(tag);
2543         struct proc_print_info *ppi = m->private;
2544         /* Detailed tags are not available to everybody */
2545         if (get_atag_from_tag(tag) && !can_read_other_uid_stats(
2546                                                 make_kuid(&init_user_ns,stat_uid))) {
2547                 CT_DEBUG("qtaguid: stats line: "
2548                          "%s 0x%llx %u: insufficient priv "
2549                          "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2550                          ppi->iface_entry->ifname,
2551                          get_atag_from_tag(tag), stat_uid,
2552                          current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2553                          from_kgid(&init_user_ns,xt_qtaguid_stats_file->gid));
2554                 return 0;
2555         }
2556         ppi->item_index++;
2557         cnts = &ts_entry->counters;
2558         ret = seq_printf(m, "%d %s 0x%llx %u %u "
2559                 "%llu %llu "
2560                 "%llu %llu "
2561                 "%llu %llu "
2562                 "%llu %llu "
2563                 "%llu %llu "
2564                 "%llu %llu "
2565                 "%llu %llu "
2566                 "%llu %llu\n",
2567                 ppi->item_index,
2568                 ppi->iface_entry->ifname,
2569                 get_atag_from_tag(tag),
2570                 stat_uid,
2571                 cnt_set,
2572                 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2573                 dc_sum_packets(cnts, cnt_set, IFS_RX),
2574                 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2575                 dc_sum_packets(cnts, cnt_set, IFS_TX),
2576                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2577                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2578                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2579                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2580                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2581                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2582                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2583                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2584                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2585                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2586                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2587                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2588         return ret ?: 1;
2589 }
2590
2591 static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
2592 {
2593         int ret;
2594         int counter_set;
2595         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2596              counter_set++) {
2597                 ret = pp_stats_line(m, ts_entry, counter_set);
2598                 if (ret < 0)
2599                         return false;
2600         }
2601         return true;
2602 }
2603
2604 static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
2605 {
2606         struct iface_stat *iface_entry;
2607
2608         if (!ptr)
2609                 return false;
2610
2611         list_for_each_entry(iface_entry, &iface_stat_list, list)
2612                 if (iface_entry == ptr)
2613                         return true;
2614         return false;
2615 }
2616
2617 static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
2618 {
2619         spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2620         list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
2621                 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2622                 return;
2623         }
2624         ppi->iface_entry = NULL;
2625 }
2626
2627 static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
2628 {
2629         struct proc_print_info *ppi = m->private;
2630         struct tag_stat *ts_entry;
2631         struct rb_node *node;
2632
2633         if (!v) {
2634                 pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
2635                 return NULL;
2636         }
2637
2638         (*pos)++;
2639
2640         if (!ppi->iface_entry || unlikely(module_passive))
2641                 return NULL;
2642
2643         if (v == SEQ_START_TOKEN)
2644                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2645         else
2646                 node = rb_next(&((struct tag_stat *)v)->tn.node);
2647
2648         while (!node) {
2649                 qtaguid_stats_proc_next_iface_entry(ppi);
2650                 if (!ppi->iface_entry)
2651                         return NULL;
2652                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2653         }
2654
2655         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2656         ppi->tag = ts_entry->tn.tag;
2657         ppi->tag_pos = *pos;
2658         ppi->tag_item_index = ppi->item_index;
2659         return ts_entry;
2660 }
2661
2662 static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
2663 {
2664         struct proc_print_info *ppi = m->private;
2665         struct tag_stat *ts_entry = NULL;
2666
2667         spin_lock_bh(&iface_stat_list_lock);
2668
2669         if (*pos == 0) {
2670                 ppi->item_index = 1;
2671                 ppi->tag_pos = 0;
2672                 if (list_empty(&iface_stat_list)) {
2673                         ppi->iface_entry = NULL;
2674                 } else {
2675                         ppi->iface_entry = list_first_entry(&iface_stat_list,
2676                                                             struct iface_stat,
2677                                                             list);
2678                         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2679                 }
2680                 return SEQ_START_TOKEN;
2681         }
2682         if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
2683                 if (ppi->iface_entry) {
2684                         pr_err("qtaguid: %s(): iface_entry %p not found\n",
2685                                __func__, ppi->iface_entry);
2686                         ppi->iface_entry = NULL;
2687                 }
2688                 return NULL;
2689         }
2690
2691         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2692
2693         if (!ppi->tag_pos) {
2694                 /* seq_read skipped first next call */
2695                 ts_entry = SEQ_START_TOKEN;
2696         } else {
2697                 ts_entry = tag_stat_tree_search(
2698                                 &ppi->iface_entry->tag_stat_tree, ppi->tag);
2699                 if (!ts_entry) {
2700                         pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2701                                 __func__, ppi->tag);
2702                         return NULL;
2703                 }
2704         }
2705
2706         if (*pos == ppi->tag_pos) { /* normal resume */
2707                 ppi->item_index = ppi->tag_item_index;
2708         } else {
2709                 /* seq_read skipped a next call */
2710                 *pos = ppi->tag_pos;
2711                 ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
2712         }
2713
2714         return ts_entry;
2715 }
2716
2717 static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
2718 {
2719         struct proc_print_info *ppi = m->private;
2720         if (ppi->iface_entry)
2721                 spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2722         spin_unlock_bh(&iface_stat_list_lock);
2723 }
2724
2725 /*
2726  * Procfs reader to get all tag stats using style "1)" as described in
2727  * fs/proc/generic.c
2728  * Groups all protocols tx/rx bytes.
2729  */
2730 static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
2731 {
2732         struct tag_stat *ts_entry = v;
2733
2734         if (v == SEQ_START_TOKEN)
2735                 pp_stats_header(m);
2736         else
2737                 pp_sets(m, ts_entry);
2738
2739         return 0;
2740 }
2741
2742 /*------------------------------------------*/
2743 static int qtudev_open(struct inode *inode, struct file *file)
2744 {
2745         struct uid_tag_data *utd_entry;
2746         struct proc_qtu_data  *pqd_entry;
2747         struct proc_qtu_data  *new_pqd_entry;
2748         int res;
2749         bool utd_entry_found;
2750
2751         if (unlikely(qtu_proc_handling_passive))
2752                 return 0;
2753
2754         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2755                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2756
2757         spin_lock_bh(&uid_tag_data_tree_lock);
2758
2759         /* Look for existing uid data, or alloc one. */
2760         utd_entry = get_uid_data(from_kuid(&init_user_ns, current_fsuid()), &utd_entry_found);
2761         if (IS_ERR_OR_NULL(utd_entry)) {
2762                 res = PTR_ERR(utd_entry);
2763                 goto err_unlock;
2764         }
2765
2766         /* Look for existing PID based proc_data */
2767         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2768                                               current->tgid);
2769         if (pqd_entry) {
2770                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2771                        "%s already opened\n",
2772                        current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2773                        QTU_DEV_NAME);
2774                 res = -EBUSY;
2775                 goto err_unlock_free_utd;
2776         }
2777
2778         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2779         if (!new_pqd_entry) {
2780                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2781                        "proc data alloc failed\n",
2782                        current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2783                 res = -ENOMEM;
2784                 goto err_unlock_free_utd;
2785         }
2786         new_pqd_entry->pid = current->tgid;
2787         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2788         new_pqd_entry->parent_tag_data = utd_entry;
2789         utd_entry->num_pqd++;
2790
2791         proc_qtu_data_tree_insert(new_pqd_entry,
2792                                   &proc_qtu_data_tree);
2793
2794         spin_unlock_bh(&uid_tag_data_tree_lock);
2795         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2796                  from_kuid(&init_user_ns, current_fsuid()), new_pqd_entry);
2797         file->private_data = new_pqd_entry;
2798         return 0;
2799
2800 err_unlock_free_utd:
2801         if (!utd_entry_found) {
2802                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2803                 kfree(utd_entry);
2804         }
2805 err_unlock:
2806         spin_unlock_bh(&uid_tag_data_tree_lock);
2807         return res;
2808 }
2809
2810 static int qtudev_release(struct inode *inode, struct file *file)
2811 {
2812         struct proc_qtu_data  *pqd_entry = file->private_data;
2813         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2814         struct sock_tag *st_entry;
2815         struct rb_root st_to_free_tree = RB_ROOT;
2816         struct list_head *entry, *next;
2817         struct tag_ref *tr;
2818
2819         if (unlikely(qtu_proc_handling_passive))
2820                 return 0;
2821
2822         /*
2823          * Do not trust the current->pid, it might just be a kworker cleaning
2824          * up after a dead proc.
2825          */
2826         DR_DEBUG("qtaguid: qtudev_release(): "
2827                  "pid=%u tgid=%u uid=%u "
2828                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2829                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2830                  pqd_entry, pqd_entry->pid, utd_entry,
2831                  utd_entry->num_active_tags);
2832
2833         spin_lock_bh(&sock_tag_list_lock);
2834         spin_lock_bh(&uid_tag_data_tree_lock);
2835
2836         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2837                 st_entry = list_entry(entry, struct sock_tag, list);
2838                 DR_DEBUG("qtaguid: %s(): "
2839                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2840                          __func__,
2841                          st_entry, st_entry->sk,
2842                          current->pid, current->tgid,
2843                          pqd_entry->parent_tag_data->uid);
2844
2845                 utd_entry = uid_tag_data_tree_search(
2846                         &uid_tag_data_tree,
2847                         get_uid_from_tag(st_entry->tag));
2848                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2849                 DR_DEBUG("qtaguid: %s(): "
2850                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2851                          st_entry->tag, utd_entry);
2852                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2853                                          st_entry->tag);
2854                 BUG_ON(!tr);
2855                 BUG_ON(tr->num_sock_tags <= 0);
2856                 tr->num_sock_tags--;
2857                 free_tag_ref_from_utd_entry(tr, utd_entry);
2858
2859                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2860                 list_del(&st_entry->list);
2861                 /* Can't sockfd_put() within spinlock, do it later. */
2862                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2863
2864                 /*
2865                  * Try to free the utd_entry if no other proc_qtu_data is
2866                  * using it (num_pqd is 0) and it doesn't have active tags
2867                  * (num_active_tags is 0).
2868                  */
2869                 put_utd_entry(utd_entry);
2870         }
2871
2872         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2873         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2874         pqd_entry->parent_tag_data->num_pqd--;
2875         put_utd_entry(pqd_entry->parent_tag_data);
2876         kfree(pqd_entry);
2877         file->private_data = NULL;
2878
2879         spin_unlock_bh(&uid_tag_data_tree_lock);
2880         spin_unlock_bh(&sock_tag_list_lock);
2881
2882
2883         sock_tag_tree_erase(&st_to_free_tree);
2884
2885         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2886                            current->pid, current->tgid);
2887         return 0;
2888 }
2889
2890 /*------------------------------------------*/
2891 static const struct file_operations qtudev_fops = {
2892         .owner = THIS_MODULE,
2893         .open = qtudev_open,
2894         .release = qtudev_release,
2895 };
2896
2897 static struct miscdevice qtu_device = {
2898         .minor = MISC_DYNAMIC_MINOR,
2899         .name = QTU_DEV_NAME,
2900         .fops = &qtudev_fops,
2901         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2902 };
2903
2904 static const struct seq_operations proc_qtaguid_ctrl_seqops = {
2905         .start = qtaguid_ctrl_proc_start,
2906         .next = qtaguid_ctrl_proc_next,
2907         .stop = qtaguid_ctrl_proc_stop,
2908         .show = qtaguid_ctrl_proc_show,
2909 };
2910
2911 static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
2912 {
2913         return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
2914                                 sizeof(struct proc_ctrl_print_info));
2915 }
2916
2917 static const struct file_operations proc_qtaguid_ctrl_fops = {
2918         .open           = proc_qtaguid_ctrl_open,
2919         .read           = seq_read,
2920         .write          = qtaguid_ctrl_proc_write,
2921         .llseek         = seq_lseek,
2922         .release        = seq_release_private,
2923 };
2924
2925 static const struct seq_operations proc_qtaguid_stats_seqops = {
2926         .start = qtaguid_stats_proc_start,
2927         .next = qtaguid_stats_proc_next,
2928         .stop = qtaguid_stats_proc_stop,
2929         .show = qtaguid_stats_proc_show,
2930 };
2931
2932 static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
2933 {
2934         return seq_open_private(file, &proc_qtaguid_stats_seqops,
2935                                 sizeof(struct proc_print_info));
2936 }
2937
2938 static const struct file_operations proc_qtaguid_stats_fops = {
2939         .open           = proc_qtaguid_stats_open,
2940         .read           = seq_read,
2941         .llseek         = seq_lseek,
2942         .release        = seq_release_private,
2943 };
2944
2945 /*------------------------------------------*/
2946 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2947 {
2948         int ret;
2949         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2950         if (!*res_procdir) {
2951                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2952                 ret = -ENOMEM;
2953                 goto no_dir;
2954         }
2955
2956         xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
2957                                                 *res_procdir,
2958                                                 &proc_qtaguid_ctrl_fops,
2959                                                 NULL);
2960         if (!xt_qtaguid_ctrl_file) {
2961                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2962                         " file\n");
2963                 ret = -ENOMEM;
2964                 goto no_ctrl_entry;
2965         }
2966
2967         xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
2968                                                  *res_procdir,
2969                                                  &proc_qtaguid_stats_fops,
2970                                                  NULL);
2971         if (!xt_qtaguid_stats_file) {
2972                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2973                         "file\n");
2974                 ret = -ENOMEM;
2975                 goto no_stats_entry;
2976         }
2977         /*
2978          * TODO: add support counter hacking
2979          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2980          */
2981         return 0;
2982
2983 no_stats_entry:
2984         remove_proc_entry("ctrl", *res_procdir);
2985 no_ctrl_entry:
2986         remove_proc_entry("xt_qtaguid", NULL);
2987 no_dir:
2988         return ret;
2989 }
2990
2991 static struct xt_match qtaguid_mt_reg __read_mostly = {
2992         /*
2993          * This module masquerades as the "owner" module so that iptables
2994          * tools can deal with it.
2995          */
2996         .name       = "owner",
2997         .revision   = 1,
2998         .family     = NFPROTO_UNSPEC,
2999         .match      = qtaguid_mt,
3000         .matchsize  = sizeof(struct xt_qtaguid_match_info),
3001         .me         = THIS_MODULE,
3002 };
3003
3004 static int __init qtaguid_mt_init(void)
3005 {
3006         if (qtaguid_proc_register(&xt_qtaguid_procdir)
3007             || iface_stat_init(xt_qtaguid_procdir)
3008             || xt_register_match(&qtaguid_mt_reg)
3009             || misc_register(&qtu_device))
3010                 return -1;
3011         return 0;
3012 }
3013
3014 /*
3015  * TODO: allow unloading of the module.
3016  * For now stats are permanent.
3017  * Kconfig forces'y/n' and never an 'm'.
3018  */
3019
3020 module_init(qtaguid_mt_init);
3021 MODULE_AUTHOR("jpa <jpa@google.com>");
3022 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3023 MODULE_LICENSE("GPL");
3024 MODULE_ALIAS("ipt_owner");
3025 MODULE_ALIAS("ip6t_owner");
3026 MODULE_ALIAS("ipt_qtaguid");
3027 MODULE_ALIAS("ip6t_qtaguid");