netfilter: xt_qtaguid: xt_socket: build fixes
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/miscdevice.h>
21 #include <linux/netfilter/x_tables.h>
22 #include <linux/netfilter/xt_qtaguid.h>
23 #include <linux/ratelimit.h>
24 #include <linux/seq_file.h>
25 #include <linux/skbuff.h>
26 #include <linux/workqueue.h>
27 #include <net/addrconf.h>
28 #include <net/sock.h>
29 #include <net/tcp.h>
30 #include <net/udp.h>
31
32 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
33 #include <linux/netfilter_ipv6/ip6_tables.h>
34 #endif
35
36 #include <linux/netfilter/xt_socket.h>
37 #include "xt_qtaguid_internal.h"
38 #include "xt_qtaguid_print.h"
39 #include "../../fs/proc/internal.h"
40
41 /*
42  * We only use the xt_socket funcs within a similar context to avoid unexpected
43  * return values.
44  */
45 #define XT_SOCKET_SUPPORTED_HOOKS \
46         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
47
48
49 static const char *module_procdirname = "xt_qtaguid";
50 static struct proc_dir_entry *xt_qtaguid_procdir;
51
52 static unsigned int proc_iface_perms = S_IRUGO;
53 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
54
55 static struct proc_dir_entry *xt_qtaguid_stats_file;
56 static unsigned int proc_stats_perms = S_IRUGO;
57 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
58
59 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
60
61 /* Everybody can write. But proc_ctrl_write_limited is true by default which
62  * limits what can be controlled. See the can_*() functions.
63  */
64 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
65 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
66
67 /* Limited by default, so the gid of the ctrl and stats proc entries
68  * will limit what can be done. See the can_*() functions.
69  */
70 static bool proc_stats_readall_limited = true;
71 static bool proc_ctrl_write_limited = true;
72
73 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
74                    S_IRUGO | S_IWUSR);
75 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
76                    S_IRUGO | S_IWUSR);
77
78 /*
79  * Limit the number of active tags (via socket tags) for a given UID.
80  * Multiple processes could share the UID.
81  */
82 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
83 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
84
85 /*
86  * After the kernel has initiallized this module, it is still possible
87  * to make it passive.
88  * Setting passive to Y:
89  *  - the iface stats handling will not act on notifications.
90  *  - iptables matches will never match.
91  *  - ctrl commands silently succeed.
92  *  - stats are always empty.
93  * This is mostly usefull when a bug is suspected.
94  */
95 static bool module_passive;
96 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
97
98 /*
99  * Control how qtaguid data is tracked per proc/uid.
100  * Setting tag_tracking_passive to Y:
101  *  - don't create proc specific structs to track tags
102  *  - don't check that active tag stats exceed some limits.
103  *  - don't clean up socket tags on process exits.
104  * This is mostly usefull when a bug is suspected.
105  */
106 static bool qtu_proc_handling_passive;
107 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
108                    S_IRUGO | S_IWUSR);
109
110 #define QTU_DEV_NAME "xt_qtaguid"
111
112 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
113 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
114
115 /*---------------------------------------------------------------------------*/
116 static const char *iface_stat_procdirname = "iface_stat";
117 static struct proc_dir_entry *iface_stat_procdir;
118 /*
119  * The iface_stat_all* will go away once userspace gets use to the new fields
120  * that have a format line.
121  */
122 static const char *iface_stat_all_procfilename = "iface_stat_all";
123 static struct proc_dir_entry *iface_stat_all_procfile;
124 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
125 static struct proc_dir_entry *iface_stat_fmt_procfile;
126
127
128 static LIST_HEAD(iface_stat_list);
129 static DEFINE_SPINLOCK(iface_stat_list_lock);
130
131 static struct rb_root sock_tag_tree = RB_ROOT;
132 static DEFINE_SPINLOCK(sock_tag_list_lock);
133
134 static struct rb_root tag_counter_set_tree = RB_ROOT;
135 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
136
137 static struct rb_root uid_tag_data_tree = RB_ROOT;
138 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
139
140 static struct rb_root proc_qtu_data_tree = RB_ROOT;
141 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
142
143 static struct qtaguid_event_counts qtu_events;
144 /*----------------------------------------------*/
145 static bool can_manipulate_uids(void)
146 {
147         /* root pwnd */
148         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
149                 || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || unlikely(!proc_ctrl_write_limited)
150                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
151 }
152
153 static bool can_impersonate_uid(kuid_t uid)
154 {
155         return uid_eq(uid, current_fsuid()) || can_manipulate_uids();
156 }
157
158 static bool can_read_other_uid_stats(kuid_t uid)
159 {
160         /* root pwnd */
161         return in_egroup_p(xt_qtaguid_stats_file->gid)
162                 || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || uid_eq(uid, current_fsuid())
163                 || unlikely(!proc_stats_readall_limited)
164                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
165 }
166
167 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
168                                   enum ifs_tx_rx direction,
169                                   enum ifs_proto ifs_proto,
170                                   int bytes,
171                                   int packets)
172 {
173         counters->bpc[set][direction][ifs_proto].bytes += bytes;
174         counters->bpc[set][direction][ifs_proto].packets += packets;
175 }
176
177 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
178 {
179         struct rb_node *node = root->rb_node;
180
181         while (node) {
182                 struct tag_node *data = rb_entry(node, struct tag_node, node);
183                 int result;
184                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
185                          " node=%p data=%p\n", tag, node, data);
186                 result = tag_compare(tag, data->tag);
187                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
188                          " data.tag=0x%llx (uid=%u) res=%d\n",
189                          tag, data->tag, get_uid_from_tag(data->tag), result);
190                 if (result < 0)
191                         node = node->rb_left;
192                 else if (result > 0)
193                         node = node->rb_right;
194                 else
195                         return data;
196         }
197         return NULL;
198 }
199
200 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
201 {
202         struct rb_node **new = &(root->rb_node), *parent = NULL;
203
204         /* Figure out where to put new node */
205         while (*new) {
206                 struct tag_node *this = rb_entry(*new, struct tag_node,
207                                                  node);
208                 int result = tag_compare(data->tag, this->tag);
209                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
210                          " (uid=%u)\n", __func__,
211                          this->tag,
212                          get_uid_from_tag(this->tag));
213                 parent = *new;
214                 if (result < 0)
215                         new = &((*new)->rb_left);
216                 else if (result > 0)
217                         new = &((*new)->rb_right);
218                 else
219                         BUG();
220         }
221
222         /* Add new node and rebalance tree. */
223         rb_link_node(&data->node, parent, new);
224         rb_insert_color(&data->node, root);
225 }
226
227 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
228 {
229         tag_node_tree_insert(&data->tn, root);
230 }
231
232 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
233 {
234         struct tag_node *node = tag_node_tree_search(root, tag);
235         if (!node)
236                 return NULL;
237         return rb_entry(&node->node, struct tag_stat, tn.node);
238 }
239
240 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
241                                         struct rb_root *root)
242 {
243         tag_node_tree_insert(&data->tn, root);
244 }
245
246 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
247                                                            tag_t tag)
248 {
249         struct tag_node *node = tag_node_tree_search(root, tag);
250         if (!node)
251                 return NULL;
252         return rb_entry(&node->node, struct tag_counter_set, tn.node);
253
254 }
255
256 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
257 {
258         tag_node_tree_insert(&data->tn, root);
259 }
260
261 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
262 {
263         struct tag_node *node = tag_node_tree_search(root, tag);
264         if (!node)
265                 return NULL;
266         return rb_entry(&node->node, struct tag_ref, tn.node);
267 }
268
269 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
270                                              const struct sock *sk)
271 {
272         struct rb_node *node = root->rb_node;
273
274         while (node) {
275                 struct sock_tag *data = rb_entry(node, struct sock_tag,
276                                                  sock_node);
277                 if (sk < data->sk)
278                         node = node->rb_left;
279                 else if (sk > data->sk)
280                         node = node->rb_right;
281                 else
282                         return data;
283         }
284         return NULL;
285 }
286
287 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
288 {
289         struct rb_node **new = &(root->rb_node), *parent = NULL;
290
291         /* Figure out where to put new node */
292         while (*new) {
293                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
294                                                  sock_node);
295                 parent = *new;
296                 if (data->sk < this->sk)
297                         new = &((*new)->rb_left);
298                 else if (data->sk > this->sk)
299                         new = &((*new)->rb_right);
300                 else
301                         BUG();
302         }
303
304         /* Add new node and rebalance tree. */
305         rb_link_node(&data->sock_node, parent, new);
306         rb_insert_color(&data->sock_node, root);
307 }
308
309 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
310 {
311         struct rb_node *node;
312         struct sock_tag *st_entry;
313
314         node = rb_first(st_to_free_tree);
315         while (node) {
316                 st_entry = rb_entry(node, struct sock_tag, sock_node);
317                 node = rb_next(node);
318                 CT_DEBUG("qtaguid: %s(): "
319                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
320                          st_entry->sk,
321                          st_entry->tag,
322                          get_uid_from_tag(st_entry->tag));
323                 rb_erase(&st_entry->sock_node, st_to_free_tree);
324                 sockfd_put(st_entry->socket);
325                 kfree(st_entry);
326         }
327 }
328
329 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
330                                                        const pid_t pid)
331 {
332         struct rb_node *node = root->rb_node;
333
334         while (node) {
335                 struct proc_qtu_data *data = rb_entry(node,
336                                                       struct proc_qtu_data,
337                                                       node);
338                 if (pid < data->pid)
339                         node = node->rb_left;
340                 else if (pid > data->pid)
341                         node = node->rb_right;
342                 else
343                         return data;
344         }
345         return NULL;
346 }
347
348 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
349                                       struct rb_root *root)
350 {
351         struct rb_node **new = &(root->rb_node), *parent = NULL;
352
353         /* Figure out where to put new node */
354         while (*new) {
355                 struct proc_qtu_data *this = rb_entry(*new,
356                                                       struct proc_qtu_data,
357                                                       node);
358                 parent = *new;
359                 if (data->pid < this->pid)
360                         new = &((*new)->rb_left);
361                 else if (data->pid > this->pid)
362                         new = &((*new)->rb_right);
363                 else
364                         BUG();
365         }
366
367         /* Add new node and rebalance tree. */
368         rb_link_node(&data->node, parent, new);
369         rb_insert_color(&data->node, root);
370 }
371
372 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
373                                      struct rb_root *root)
374 {
375         struct rb_node **new = &(root->rb_node), *parent = NULL;
376
377         /* Figure out where to put new node */
378         while (*new) {
379                 struct uid_tag_data *this = rb_entry(*new,
380                                                      struct uid_tag_data,
381                                                      node);
382                 parent = *new;
383                 if (data->uid < this->uid)
384                         new = &((*new)->rb_left);
385                 else if (data->uid > this->uid)
386                         new = &((*new)->rb_right);
387                 else
388                         BUG();
389         }
390
391         /* Add new node and rebalance tree. */
392         rb_link_node(&data->node, parent, new);
393         rb_insert_color(&data->node, root);
394 }
395
396 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
397                                                      uid_t uid)
398 {
399         struct rb_node *node = root->rb_node;
400
401         while (node) {
402                 struct uid_tag_data *data = rb_entry(node,
403                                                      struct uid_tag_data,
404                                                      node);
405                 if (uid < data->uid)
406                         node = node->rb_left;
407                 else if (uid > data->uid)
408                         node = node->rb_right;
409                 else
410                         return data;
411         }
412         return NULL;
413 }
414
415 /*
416  * Allocates a new uid_tag_data struct if needed.
417  * Returns a pointer to the found or allocated uid_tag_data.
418  * Returns a PTR_ERR on failures, and lock is not held.
419  * If found is not NULL:
420  *   sets *found to true if not allocated.
421  *   sets *found to false if allocated.
422  */
423 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
424 {
425         struct uid_tag_data *utd_entry;
426
427         /* Look for top level uid_tag_data for the UID */
428         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
429         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
430
431         if (found_res)
432                 *found_res = utd_entry;
433         if (utd_entry)
434                 return utd_entry;
435
436         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
437         if (!utd_entry) {
438                 pr_err("qtaguid: get_uid_data(%u): "
439                        "tag data alloc failed\n", uid);
440                 return ERR_PTR(-ENOMEM);
441         }
442
443         utd_entry->uid = uid;
444         utd_entry->tag_ref_tree = RB_ROOT;
445         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
446         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
447         return utd_entry;
448 }
449
450 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
451 static struct tag_ref *new_tag_ref(tag_t new_tag,
452                                    struct uid_tag_data *utd_entry)
453 {
454         struct tag_ref *tr_entry;
455         int res;
456
457         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
458                 pr_info("qtaguid: new_tag_ref(0x%llx): "
459                         "tag ref alloc quota exceeded. max=%d\n",
460                         new_tag, max_sock_tags);
461                 res = -EMFILE;
462                 goto err_res;
463
464         }
465
466         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
467         if (!tr_entry) {
468                 pr_err("qtaguid: new_tag_ref(0x%llx): "
469                        "tag ref alloc failed\n",
470                        new_tag);
471                 res = -ENOMEM;
472                 goto err_res;
473         }
474         tr_entry->tn.tag = new_tag;
475         /* tr_entry->num_sock_tags  handled by caller */
476         utd_entry->num_active_tags++;
477         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
478         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
479                  " inserted new tag ref %p\n",
480                  new_tag, tr_entry);
481         return tr_entry;
482
483 err_res:
484         return ERR_PTR(res);
485 }
486
487 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
488                                       struct uid_tag_data **utd_res)
489 {
490         struct uid_tag_data *utd_entry;
491         struct tag_ref *tr_entry;
492         bool found_utd;
493         uid_t uid = get_uid_from_tag(full_tag);
494
495         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
496                  full_tag, uid);
497
498         utd_entry = get_uid_data(uid, &found_utd);
499         if (IS_ERR_OR_NULL(utd_entry)) {
500                 if (utd_res)
501                         *utd_res = utd_entry;
502                 return NULL;
503         }
504
505         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
506         if (utd_res)
507                 *utd_res = utd_entry;
508         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
509                  full_tag, utd_entry, tr_entry);
510         return tr_entry;
511 }
512
513 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
514 static struct tag_ref *get_tag_ref(tag_t full_tag,
515                                    struct uid_tag_data **utd_res)
516 {
517         struct uid_tag_data *utd_entry;
518         struct tag_ref *tr_entry;
519
520         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
521                  full_tag);
522         spin_lock_bh(&uid_tag_data_tree_lock);
523         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
524         BUG_ON(IS_ERR_OR_NULL(utd_entry));
525         if (!tr_entry)
526                 tr_entry = new_tag_ref(full_tag, utd_entry);
527
528         spin_unlock_bh(&uid_tag_data_tree_lock);
529         if (utd_res)
530                 *utd_res = utd_entry;
531         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
532                  full_tag, utd_entry, tr_entry);
533         return tr_entry;
534 }
535
536 /* Checks and maybe frees the UID Tag Data entry */
537 static void put_utd_entry(struct uid_tag_data *utd_entry)
538 {
539         /* Are we done with the UID tag data entry? */
540         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
541                 !utd_entry->num_pqd) {
542                 DR_DEBUG("qtaguid: %s(): "
543                          "erase utd_entry=%p uid=%u "
544                          "by pid=%u tgid=%u uid=%u\n", __func__,
545                          utd_entry, utd_entry->uid,
546                          current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
547                 BUG_ON(utd_entry->num_active_tags);
548                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
549                 kfree(utd_entry);
550         } else {
551                 DR_DEBUG("qtaguid: %s(): "
552                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
553                          __func__, utd_entry, utd_entry->num_active_tags,
554                          utd_entry->num_pqd);
555                 BUG_ON(!(utd_entry->num_active_tags ||
556                          utd_entry->num_pqd));
557         }
558 }
559
560 /*
561  * If no sock_tags are using this tag_ref,
562  * decrements refcount of utd_entry, removes tr_entry
563  * from utd_entry->tag_ref_tree and frees.
564  */
565 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
566                                         struct uid_tag_data *utd_entry)
567 {
568         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
569                  tr_entry, tr_entry->tn.tag,
570                  get_uid_from_tag(tr_entry->tn.tag));
571         if (!tr_entry->num_sock_tags) {
572                 BUG_ON(!utd_entry->num_active_tags);
573                 utd_entry->num_active_tags--;
574                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
575                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
576                 kfree(tr_entry);
577         }
578 }
579
580 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
581 {
582         struct rb_node *node;
583         struct tag_ref *tr_entry;
584         tag_t acct_tag;
585
586         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
587                  full_tag, get_uid_from_tag(full_tag));
588         acct_tag = get_atag_from_tag(full_tag);
589         node = rb_first(&utd_entry->tag_ref_tree);
590         while (node) {
591                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
592                 node = rb_next(node);
593                 if (!acct_tag || tr_entry->tn.tag == full_tag)
594                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
595         }
596 }
597
598 static ssize_t read_proc_u64(struct file *file, char __user *buf,
599                          size_t size, loff_t *ppos)
600 {
601         uint64_t *valuep = PDE_DATA(file_inode(file));
602         char tmp[24];
603         size_t tmp_size;
604
605         tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
606         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
607 }
608
609 static ssize_t read_proc_bool(struct file *file, char __user *buf,
610                           size_t size, loff_t *ppos)
611 {
612         bool *valuep = PDE_DATA(file_inode(file));
613         char tmp[24];
614         size_t tmp_size;
615
616         tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
617         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
618 }
619
620 static int get_active_counter_set(tag_t tag)
621 {
622         int active_set = 0;
623         struct tag_counter_set *tcs;
624
625         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
626                  " (uid=%u)\n",
627                  tag, get_uid_from_tag(tag));
628         /* For now we only handle UID tags for active sets */
629         tag = get_utag_from_tag(tag);
630         spin_lock_bh(&tag_counter_set_list_lock);
631         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
632         if (tcs)
633                 active_set = tcs->active_set;
634         spin_unlock_bh(&tag_counter_set_list_lock);
635         return active_set;
636 }
637
638 /*
639  * Find the entry for tracking the specified interface.
640  * Caller must hold iface_stat_list_lock
641  */
642 static struct iface_stat *get_iface_entry(const char *ifname)
643 {
644         struct iface_stat *iface_entry;
645
646         /* Find the entry for tracking the specified tag within the interface */
647         if (ifname == NULL) {
648                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
649                 return NULL;
650         }
651
652         /* Iterate over interfaces */
653         list_for_each_entry(iface_entry, &iface_stat_list, list) {
654                 if (!strcmp(ifname, iface_entry->ifname))
655                         goto done;
656         }
657         iface_entry = NULL;
658 done:
659         return iface_entry;
660 }
661
662 /* This is for fmt2 only */
663 static void pp_iface_stat_header(struct seq_file *m)
664 {
665         seq_puts(m,
666                  "ifname "
667                  "total_skb_rx_bytes total_skb_rx_packets "
668                  "total_skb_tx_bytes total_skb_tx_packets "
669                  "rx_tcp_bytes rx_tcp_packets "
670                  "rx_udp_bytes rx_udp_packets "
671                  "rx_other_bytes rx_other_packets "
672                  "tx_tcp_bytes tx_tcp_packets "
673                  "tx_udp_bytes tx_udp_packets "
674                  "tx_other_bytes tx_other_packets\n"
675         );
676 }
677
678 static void pp_iface_stat_line(struct seq_file *m,
679                                struct iface_stat *iface_entry)
680 {
681         struct data_counters *cnts;
682         int cnt_set = 0;   /* We only use one set for the device */
683         cnts = &iface_entry->totals_via_skb;
684         seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
685                    "%llu %llu %llu %llu %llu %llu %llu %llu\n",
686                    iface_entry->ifname,
687                    dc_sum_bytes(cnts, cnt_set, IFS_RX),
688                    dc_sum_packets(cnts, cnt_set, IFS_RX),
689                    dc_sum_bytes(cnts, cnt_set, IFS_TX),
690                    dc_sum_packets(cnts, cnt_set, IFS_TX),
691                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
692                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
693                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
694                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
695                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
696                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
697                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
698                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
699                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
700                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
701                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
702                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
703 }
704
705 struct proc_iface_stat_fmt_info {
706         int fmt;
707 };
708
709 static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
710 {
711         struct proc_iface_stat_fmt_info *p = m->private;
712         loff_t n = *pos;
713
714         /*
715          * This lock will prevent iface_stat_update() from changing active,
716          * and in turn prevent an interface from unregistering itself.
717          */
718         spin_lock_bh(&iface_stat_list_lock);
719
720         if (unlikely(module_passive))
721                 return NULL;
722
723         if (!n && p->fmt == 2)
724                 pp_iface_stat_header(m);
725
726         return seq_list_start(&iface_stat_list, n);
727 }
728
729 static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
730 {
731         return seq_list_next(p, &iface_stat_list, pos);
732 }
733
734 static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
735 {
736         spin_unlock_bh(&iface_stat_list_lock);
737 }
738
739 static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
740 {
741         struct proc_iface_stat_fmt_info *p = m->private;
742         struct iface_stat *iface_entry;
743         struct rtnl_link_stats64 dev_stats, *stats;
744         struct rtnl_link_stats64 no_dev_stats = {0};
745
746
747         CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
748                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
749
750         iface_entry = list_entry(v, struct iface_stat, list);
751
752         if (iface_entry->active) {
753                 stats = dev_get_stats(iface_entry->net_dev,
754                                       &dev_stats);
755         } else {
756                 stats = &no_dev_stats;
757         }
758         /*
759          * If the meaning of the data changes, then update the fmtX
760          * string.
761          */
762         if (p->fmt == 1) {
763                 seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
764                            iface_entry->ifname,
765                            iface_entry->active,
766                            iface_entry->totals_via_dev[IFS_RX].bytes,
767                            iface_entry->totals_via_dev[IFS_RX].packets,
768                            iface_entry->totals_via_dev[IFS_TX].bytes,
769                            iface_entry->totals_via_dev[IFS_TX].packets,
770                            stats->rx_bytes, stats->rx_packets,
771                            stats->tx_bytes, stats->tx_packets
772                            );
773         } else {
774                 pp_iface_stat_line(m, iface_entry);
775         }
776         return 0;
777 }
778
779 static const struct file_operations read_u64_fops = {
780         .read           = read_proc_u64,
781         .llseek         = default_llseek,
782 };
783
784 static const struct file_operations read_bool_fops = {
785         .read           = read_proc_bool,
786         .llseek         = default_llseek,
787 };
788
789 static void iface_create_proc_worker(struct work_struct *work)
790 {
791         struct proc_dir_entry *proc_entry;
792         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
793                                                    iface_work);
794         struct iface_stat *new_iface  = isw->iface_entry;
795
796         /* iface_entries are not deleted, so safe to manipulate. */
797         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
798         if (IS_ERR_OR_NULL(proc_entry)) {
799                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
800                 kfree(isw);
801                 return;
802         }
803
804         new_iface->proc_ptr = proc_entry;
805
806         proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
807                          &read_u64_fops,
808                          &new_iface->totals_via_dev[IFS_TX].bytes);
809         proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
810                          &read_u64_fops,
811                          &new_iface->totals_via_dev[IFS_RX].bytes);
812         proc_create_data("tx_packets", proc_iface_perms, proc_entry,
813                          &read_u64_fops,
814                          &new_iface->totals_via_dev[IFS_TX].packets);
815         proc_create_data("rx_packets", proc_iface_perms, proc_entry,
816                          &read_u64_fops,
817                          &new_iface->totals_via_dev[IFS_RX].packets);
818         proc_create_data("active", proc_iface_perms, proc_entry,
819                          &read_bool_fops, &new_iface->active);
820
821         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
822                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
823         kfree(isw);
824 }
825
826 /*
827  * Will set the entry's active state, and
828  * update the net_dev accordingly also.
829  */
830 static void _iface_stat_set_active(struct iface_stat *entry,
831                                    struct net_device *net_dev,
832                                    bool activate)
833 {
834         if (activate) {
835                 entry->net_dev = net_dev;
836                 entry->active = true;
837                 IF_DEBUG("qtaguid: %s(%s): "
838                          "enable tracking. rfcnt=%d\n", __func__,
839                          entry->ifname,
840                          __this_cpu_read(*net_dev->pcpu_refcnt));
841         } else {
842                 entry->active = false;
843                 entry->net_dev = NULL;
844                 IF_DEBUG("qtaguid: %s(%s): "
845                          "disable tracking. rfcnt=%d\n", __func__,
846                          entry->ifname,
847                          __this_cpu_read(*net_dev->pcpu_refcnt));
848
849         }
850 }
851
852 /* Caller must hold iface_stat_list_lock */
853 static struct iface_stat *iface_alloc(struct net_device *net_dev)
854 {
855         struct iface_stat *new_iface;
856         struct iface_stat_work *isw;
857
858         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
859         if (new_iface == NULL) {
860                 pr_err("qtaguid: iface_stat: create(%s): "
861                        "iface_stat alloc failed\n", net_dev->name);
862                 return NULL;
863         }
864         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
865         if (new_iface->ifname == NULL) {
866                 pr_err("qtaguid: iface_stat: create(%s): "
867                        "ifname alloc failed\n", net_dev->name);
868                 kfree(new_iface);
869                 return NULL;
870         }
871         spin_lock_init(&new_iface->tag_stat_list_lock);
872         new_iface->tag_stat_tree = RB_ROOT;
873         _iface_stat_set_active(new_iface, net_dev, true);
874
875         /*
876          * ipv6 notifier chains are atomic :(
877          * No create_proc_read_entry() for you!
878          */
879         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
880         if (!isw) {
881                 pr_err("qtaguid: iface_stat: create(%s): "
882                        "work alloc failed\n", new_iface->ifname);
883                 _iface_stat_set_active(new_iface, net_dev, false);
884                 kfree(new_iface->ifname);
885                 kfree(new_iface);
886                 return NULL;
887         }
888         isw->iface_entry = new_iface;
889         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
890         schedule_work(&isw->iface_work);
891         list_add(&new_iface->list, &iface_stat_list);
892         return new_iface;
893 }
894
895 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
896                                                struct iface_stat *iface)
897 {
898         struct rtnl_link_stats64 dev_stats, *stats;
899         bool stats_rewound;
900
901         stats = dev_get_stats(net_dev, &dev_stats);
902         /* No empty packets */
903         stats_rewound =
904                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
905                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
906
907         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
908                  "bytes rx/tx=%llu/%llu "
909                  "active=%d last_known=%d "
910                  "stats_rewound=%d\n", __func__,
911                  net_dev ? net_dev->name : "?",
912                  iface, net_dev,
913                  stats->rx_bytes, stats->tx_bytes,
914                  iface->active, iface->last_known_valid, stats_rewound);
915
916         if (iface->active && iface->last_known_valid && stats_rewound) {
917                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
918                              "iface reset its stats unexpectedly\n", __func__,
919                              net_dev->name);
920
921                 iface->totals_via_dev[IFS_TX].bytes +=
922                         iface->last_known[IFS_TX].bytes;
923                 iface->totals_via_dev[IFS_TX].packets +=
924                         iface->last_known[IFS_TX].packets;
925                 iface->totals_via_dev[IFS_RX].bytes +=
926                         iface->last_known[IFS_RX].bytes;
927                 iface->totals_via_dev[IFS_RX].packets +=
928                         iface->last_known[IFS_RX].packets;
929                 iface->last_known_valid = false;
930                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
931                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
932                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
933                          iface->last_known[IFS_TX].bytes);
934         }
935 }
936
937 /*
938  * Create a new entry for tracking the specified interface.
939  * Do nothing if the entry already exists.
940  * Called when an interface is configured with a valid IP address.
941  */
942 static void iface_stat_create(struct net_device *net_dev,
943                               struct in_ifaddr *ifa)
944 {
945         struct in_device *in_dev = NULL;
946         const char *ifname;
947         struct iface_stat *entry;
948         __be32 ipaddr = 0;
949         struct iface_stat *new_iface;
950
951         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
952                  net_dev ? net_dev->name : "?",
953                  ifa, net_dev);
954         if (!net_dev) {
955                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
956                 return;
957         }
958
959         ifname = net_dev->name;
960         if (!ifa) {
961                 in_dev = in_dev_get(net_dev);
962                 if (!in_dev) {
963                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
964                                ifname);
965                         return;
966                 }
967                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
968                          ifname, in_dev);
969                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
970                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
971                                  "ifa=%p ifa_label=%s\n",
972                                  ifname, ifa,
973                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
974                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
975                                 break;
976                 }
977         }
978
979         if (!ifa) {
980                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
981                          ifname);
982                 goto done_put;
983         }
984         ipaddr = ifa->ifa_local;
985
986         spin_lock_bh(&iface_stat_list_lock);
987         entry = get_iface_entry(ifname);
988         if (entry != NULL) {
989                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
990                          ifname, entry);
991                 iface_check_stats_reset_and_adjust(net_dev, entry);
992                 _iface_stat_set_active(entry, net_dev, true);
993                 IF_DEBUG("qtaguid: %s(%s): "
994                          "tracking now %d on ip=%pI4\n", __func__,
995                          entry->ifname, true, &ipaddr);
996                 goto done_unlock_put;
997         }
998
999         new_iface = iface_alloc(net_dev);
1000         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1001                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1002 done_unlock_put:
1003         spin_unlock_bh(&iface_stat_list_lock);
1004 done_put:
1005         if (in_dev)
1006                 in_dev_put(in_dev);
1007 }
1008
1009 static void iface_stat_create_ipv6(struct net_device *net_dev,
1010                                    struct inet6_ifaddr *ifa)
1011 {
1012         struct in_device *in_dev;
1013         const char *ifname;
1014         struct iface_stat *entry;
1015         struct iface_stat *new_iface;
1016         int addr_type;
1017
1018         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1019                  ifa, net_dev, net_dev ? net_dev->name : "");
1020         if (!net_dev) {
1021                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1022                 return;
1023         }
1024         ifname = net_dev->name;
1025
1026         in_dev = in_dev_get(net_dev);
1027         if (!in_dev) {
1028                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1029                        ifname);
1030                 return;
1031         }
1032
1033         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1034                  ifname, in_dev);
1035
1036         if (!ifa) {
1037                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1038                          ifname);
1039                 goto done_put;
1040         }
1041         addr_type = ipv6_addr_type(&ifa->addr);
1042
1043         spin_lock_bh(&iface_stat_list_lock);
1044         entry = get_iface_entry(ifname);
1045         if (entry != NULL) {
1046                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1047                          ifname, entry);
1048                 iface_check_stats_reset_and_adjust(net_dev, entry);
1049                 _iface_stat_set_active(entry, net_dev, true);
1050                 IF_DEBUG("qtaguid: %s(%s): "
1051                          "tracking now %d on ip=%pI6c\n", __func__,
1052                          entry->ifname, true, &ifa->addr);
1053                 goto done_unlock_put;
1054         }
1055
1056         new_iface = iface_alloc(net_dev);
1057         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1058                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1059
1060 done_unlock_put:
1061         spin_unlock_bh(&iface_stat_list_lock);
1062 done_put:
1063         in_dev_put(in_dev);
1064 }
1065
1066 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1067 {
1068         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1069         return sock_tag_tree_search(&sock_tag_tree, sk);
1070 }
1071
1072 static struct sock_tag *get_sock_stat(const struct sock *sk)
1073 {
1074         struct sock_tag *sock_tag_entry;
1075         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1076         if (!sk)
1077                 return NULL;
1078         spin_lock_bh(&sock_tag_list_lock);
1079         sock_tag_entry = get_sock_stat_nl(sk);
1080         spin_unlock_bh(&sock_tag_list_lock);
1081         return sock_tag_entry;
1082 }
1083
1084 static int ipx_proto(const struct sk_buff *skb,
1085                      struct xt_action_param *par)
1086 {
1087         int thoff = 0, tproto;
1088
1089         switch (par->family) {
1090         case NFPROTO_IPV6:
1091                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1092                 if (tproto < 0)
1093                         MT_DEBUG("%s(): transport header not found in ipv6"
1094                                  " skb=%p\n", __func__, skb);
1095                 break;
1096         case NFPROTO_IPV4:
1097                 tproto = ip_hdr(skb)->protocol;
1098                 break;
1099         default:
1100                 tproto = IPPROTO_RAW;
1101         }
1102         return tproto;
1103 }
1104
1105 static void
1106 data_counters_update(struct data_counters *dc, int set,
1107                      enum ifs_tx_rx direction, int proto, int bytes)
1108 {
1109         switch (proto) {
1110         case IPPROTO_TCP:
1111                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1112                 break;
1113         case IPPROTO_UDP:
1114                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1115                 break;
1116         case IPPROTO_IP:
1117         default:
1118                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1119                                     1);
1120                 break;
1121         }
1122 }
1123
1124 /*
1125  * Update stats for the specified interface. Do nothing if the entry
1126  * does not exist (when a device was never configured with an IP address).
1127  * Called when an device is being unregistered.
1128  */
1129 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1130 {
1131         struct rtnl_link_stats64 dev_stats, *stats;
1132         struct iface_stat *entry;
1133
1134         stats = dev_get_stats(net_dev, &dev_stats);
1135         spin_lock_bh(&iface_stat_list_lock);
1136         entry = get_iface_entry(net_dev->name);
1137         if (entry == NULL) {
1138                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1139                          net_dev->name);
1140                 spin_unlock_bh(&iface_stat_list_lock);
1141                 return;
1142         }
1143
1144         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1145                  net_dev->name, entry);
1146         if (!entry->active) {
1147                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1148                          net_dev->name);
1149                 spin_unlock_bh(&iface_stat_list_lock);
1150                 return;
1151         }
1152
1153         if (stash_only) {
1154                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1155                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1156                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1157                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1158                 entry->last_known_valid = true;
1159                 IF_DEBUG("qtaguid: %s(%s): "
1160                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1161                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1162                 spin_unlock_bh(&iface_stat_list_lock);
1163                 return;
1164         }
1165         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1166         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1167         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1168         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1169         /* We don't need the last_known[] anymore */
1170         entry->last_known_valid = false;
1171         _iface_stat_set_active(entry, net_dev, false);
1172         IF_DEBUG("qtaguid: %s(%s): "
1173                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1174                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1175         spin_unlock_bh(&iface_stat_list_lock);
1176 }
1177
1178 /*
1179  * Update stats for the specified interface from the skb.
1180  * Do nothing if the entry
1181  * does not exist (when a device was never configured with an IP address).
1182  * Called on each sk.
1183  */
1184 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1185                                        struct xt_action_param *par)
1186 {
1187         struct iface_stat *entry;
1188         const struct net_device *el_dev;
1189         enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1190         int bytes = skb->len;
1191         int proto;
1192
1193         if (!skb->dev) {
1194                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1195                 el_dev = par->in ? : par->out;
1196         } else {
1197                 const struct net_device *other_dev;
1198                 el_dev = skb->dev;
1199                 other_dev = par->in ? : par->out;
1200                 if (el_dev != other_dev) {
1201                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1202                                  "par->(in/out)=%p %s\n",
1203                                  par->hooknum, el_dev, el_dev->name, other_dev,
1204                                  other_dev->name);
1205                 }
1206         }
1207
1208         if (unlikely(!el_dev)) {
1209                 pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
1210                                    par->hooknum, __func__);
1211                 BUG();
1212         } else if (unlikely(!el_dev->name)) {
1213                 pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
1214                                    par->hooknum, __func__);
1215                 BUG();
1216         } else {
1217                 proto = ipx_proto(skb, par);
1218                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1219                          par->hooknum, el_dev->name, el_dev->type,
1220                          par->family, proto);
1221         }
1222
1223         spin_lock_bh(&iface_stat_list_lock);
1224         entry = get_iface_entry(el_dev->name);
1225         if (entry == NULL) {
1226                 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1227                          __func__, el_dev->name);
1228                 spin_unlock_bh(&iface_stat_list_lock);
1229                 return;
1230         }
1231
1232         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1233                  el_dev->name, entry);
1234
1235         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1236                              bytes);
1237         spin_unlock_bh(&iface_stat_list_lock);
1238 }
1239
1240 static void tag_stat_update(struct tag_stat *tag_entry,
1241                         enum ifs_tx_rx direction, int proto, int bytes)
1242 {
1243         int active_set;
1244         active_set = get_active_counter_set(tag_entry->tn.tag);
1245         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1246                  "dir=%d proto=%d bytes=%d)\n",
1247                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1248                  active_set, direction, proto, bytes);
1249         data_counters_update(&tag_entry->counters, active_set, direction,
1250                              proto, bytes);
1251         if (tag_entry->parent_counters)
1252                 data_counters_update(tag_entry->parent_counters, active_set,
1253                                      direction, proto, bytes);
1254 }
1255
1256 /*
1257  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1258  * the interface.
1259  * iface_entry->tag_stat_list_lock should be held.
1260  */
1261 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1262                                            tag_t tag)
1263 {
1264         struct tag_stat *new_tag_stat_entry = NULL;
1265         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1266                  " (uid=%u)\n", __func__,
1267                  iface_entry, tag, get_uid_from_tag(tag));
1268         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1269         if (!new_tag_stat_entry) {
1270                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1271                 goto done;
1272         }
1273         new_tag_stat_entry->tn.tag = tag;
1274         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1275 done:
1276         return new_tag_stat_entry;
1277 }
1278
1279 static void if_tag_stat_update(const char *ifname, uid_t uid,
1280                                const struct sock *sk, enum ifs_tx_rx direction,
1281                                int proto, int bytes)
1282 {
1283         struct tag_stat *tag_stat_entry;
1284         tag_t tag, acct_tag;
1285         tag_t uid_tag;
1286         struct data_counters *uid_tag_counters;
1287         struct sock_tag *sock_tag_entry;
1288         struct iface_stat *iface_entry;
1289         struct tag_stat *new_tag_stat = NULL;
1290         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1291                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1292                  ifname, uid, sk, direction, proto, bytes);
1293
1294
1295         iface_entry = get_iface_entry(ifname);
1296         if (!iface_entry) {
1297                 pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
1298                                    "%s not found\n", ifname);
1299                 return;
1300         }
1301         /* It is ok to process data when an iface_entry is inactive */
1302
1303         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1304                  ifname, iface_entry);
1305
1306         /*
1307          * Look for a tagged sock.
1308          * It will have an acct_uid.
1309          */
1310         sock_tag_entry = get_sock_stat(sk);
1311         if (sock_tag_entry) {
1312                 tag = sock_tag_entry->tag;
1313                 acct_tag = get_atag_from_tag(tag);
1314                 uid_tag = get_utag_from_tag(tag);
1315         } else {
1316                 acct_tag = make_atag_from_value(0);
1317                 tag = combine_atag_with_uid(acct_tag, uid);
1318                 uid_tag = make_tag_from_uid(uid);
1319         }
1320         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1321                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1322                  tag, get_uid_from_tag(tag), iface_entry);
1323         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1324         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1325
1326         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1327                                               tag);
1328         if (tag_stat_entry) {
1329                 /*
1330                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1331                  * {0, uid_tag} will also get updated.
1332                  */
1333                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1334                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1335                 return;
1336         }
1337
1338         /* Loop over tag list under this interface for {0,uid_tag} */
1339         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1340                                               uid_tag);
1341         if (!tag_stat_entry) {
1342                 /* Here: the base uid_tag did not exist */
1343                 /*
1344                  * No parent counters. So
1345                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1346                  */
1347                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1348                 if (!new_tag_stat)
1349                         goto unlock;
1350                 uid_tag_counters = &new_tag_stat->counters;
1351         } else {
1352                 uid_tag_counters = &tag_stat_entry->counters;
1353         }
1354
1355         if (acct_tag) {
1356                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1357                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1358                 if (!new_tag_stat)
1359                         goto unlock;
1360                 new_tag_stat->parent_counters = uid_tag_counters;
1361         } else {
1362                 /*
1363                  * For new_tag_stat to be still NULL here would require:
1364                  *  {0, uid_tag} exists
1365                  *  and {acct_tag, uid_tag} doesn't exist
1366                  *  AND acct_tag == 0.
1367                  * Impossible. This reassures us that new_tag_stat
1368                  * below will always be assigned.
1369                  */
1370                 BUG_ON(!new_tag_stat);
1371         }
1372         tag_stat_update(new_tag_stat, direction, proto, bytes);
1373 unlock:
1374         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1375 }
1376
1377 static int iface_netdev_event_handler(struct notifier_block *nb,
1378                                       unsigned long event, void *ptr) {
1379         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1380
1381         if (unlikely(module_passive))
1382                 return NOTIFY_DONE;
1383
1384         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1385                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1386                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1387
1388         switch (event) {
1389         case NETDEV_UP:
1390                 iface_stat_create(dev, NULL);
1391                 atomic64_inc(&qtu_events.iface_events);
1392                 break;
1393         case NETDEV_DOWN:
1394         case NETDEV_UNREGISTER:
1395                 iface_stat_update(dev, event == NETDEV_DOWN);
1396                 atomic64_inc(&qtu_events.iface_events);
1397                 break;
1398         }
1399         return NOTIFY_DONE;
1400 }
1401
1402 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1403                                          unsigned long event, void *ptr)
1404 {
1405         struct inet6_ifaddr *ifa = ptr;
1406         struct net_device *dev;
1407
1408         if (unlikely(module_passive))
1409                 return NOTIFY_DONE;
1410
1411         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1412                  "ev=0x%lx/%s ifa=%p\n",
1413                  event, netdev_evt_str(event), ifa);
1414
1415         switch (event) {
1416         case NETDEV_UP:
1417                 BUG_ON(!ifa || !ifa->idev);
1418                 dev = (struct net_device *)ifa->idev->dev;
1419                 iface_stat_create_ipv6(dev, ifa);
1420                 atomic64_inc(&qtu_events.iface_events);
1421                 break;
1422         case NETDEV_DOWN:
1423         case NETDEV_UNREGISTER:
1424                 BUG_ON(!ifa || !ifa->idev);
1425                 dev = (struct net_device *)ifa->idev->dev;
1426                 iface_stat_update(dev, event == NETDEV_DOWN);
1427                 atomic64_inc(&qtu_events.iface_events);
1428                 break;
1429         }
1430         return NOTIFY_DONE;
1431 }
1432
1433 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1434                                         unsigned long event, void *ptr)
1435 {
1436         struct in_ifaddr *ifa = ptr;
1437         struct net_device *dev;
1438
1439         if (unlikely(module_passive))
1440                 return NOTIFY_DONE;
1441
1442         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1443                  "ev=0x%lx/%s ifa=%p\n",
1444                  event, netdev_evt_str(event), ifa);
1445
1446         switch (event) {
1447         case NETDEV_UP:
1448                 BUG_ON(!ifa || !ifa->ifa_dev);
1449                 dev = ifa->ifa_dev->dev;
1450                 iface_stat_create(dev, ifa);
1451                 atomic64_inc(&qtu_events.iface_events);
1452                 break;
1453         case NETDEV_DOWN:
1454         case NETDEV_UNREGISTER:
1455                 BUG_ON(!ifa || !ifa->ifa_dev);
1456                 dev = ifa->ifa_dev->dev;
1457                 iface_stat_update(dev, event == NETDEV_DOWN);
1458                 atomic64_inc(&qtu_events.iface_events);
1459                 break;
1460         }
1461         return NOTIFY_DONE;
1462 }
1463
1464 static struct notifier_block iface_netdev_notifier_blk = {
1465         .notifier_call = iface_netdev_event_handler,
1466 };
1467
1468 static struct notifier_block iface_inetaddr_notifier_blk = {
1469         .notifier_call = iface_inetaddr_event_handler,
1470 };
1471
1472 static struct notifier_block iface_inet6addr_notifier_blk = {
1473         .notifier_call = iface_inet6addr_event_handler,
1474 };
1475
1476 static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
1477         .start  = iface_stat_fmt_proc_start,
1478         .next   = iface_stat_fmt_proc_next,
1479         .stop   = iface_stat_fmt_proc_stop,
1480         .show   = iface_stat_fmt_proc_show,
1481 };
1482
1483 static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
1484 {
1485         struct proc_iface_stat_fmt_info *s;
1486
1487         s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
1488                         sizeof(struct proc_iface_stat_fmt_info));
1489         if (!s)
1490                 return -ENOMEM;
1491
1492         s->fmt = (uintptr_t)PDE_DATA(inode);
1493         return 0;
1494 }
1495
1496 static const struct file_operations proc_iface_stat_fmt_fops = {
1497         .open           = proc_iface_stat_fmt_open,
1498         .read           = seq_read,
1499         .llseek         = seq_lseek,
1500         .release        = seq_release_private,
1501 };
1502
1503 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1504 {
1505         int err;
1506
1507         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1508         if (!iface_stat_procdir) {
1509                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1510                 err = -1;
1511                 goto err;
1512         }
1513
1514         iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
1515                                                    proc_iface_perms,
1516                                                    parent_procdir,
1517                                                    &proc_iface_stat_fmt_fops,
1518                                                    (void *)1 /* fmt1 */);
1519         if (!iface_stat_all_procfile) {
1520                 pr_err("qtaguid: iface_stat: init "
1521                        " failed to create stat_old proc entry\n");
1522                 err = -1;
1523                 goto err_zap_entry;
1524         }
1525
1526         iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
1527                                                    proc_iface_perms,
1528                                                    parent_procdir,
1529                                                    &proc_iface_stat_fmt_fops,
1530                                                    (void *)2 /* fmt2 */);
1531         if (!iface_stat_fmt_procfile) {
1532                 pr_err("qtaguid: iface_stat: init "
1533                        " failed to create stat_all proc entry\n");
1534                 err = -1;
1535                 goto err_zap_all_stats_entry;
1536         }
1537
1538
1539         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1540         if (err) {
1541                 pr_err("qtaguid: iface_stat: init "
1542                        "failed to register dev event handler\n");
1543                 goto err_zap_all_stats_entries;
1544         }
1545         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1546         if (err) {
1547                 pr_err("qtaguid: iface_stat: init "
1548                        "failed to register ipv4 dev event handler\n");
1549                 goto err_unreg_nd;
1550         }
1551
1552         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1553         if (err) {
1554                 pr_err("qtaguid: iface_stat: init "
1555                        "failed to register ipv6 dev event handler\n");
1556                 goto err_unreg_ip4_addr;
1557         }
1558         return 0;
1559
1560 err_unreg_ip4_addr:
1561         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1562 err_unreg_nd:
1563         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1564 err_zap_all_stats_entries:
1565         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1566 err_zap_all_stats_entry:
1567         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1568 err_zap_entry:
1569         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1570 err:
1571         return err;
1572 }
1573
1574 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1575                                     struct xt_action_param *par)
1576 {
1577         struct sock *sk;
1578         unsigned int hook_mask = (1 << par->hooknum);
1579
1580         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1581                  par->hooknum, par->family);
1582
1583         /*
1584          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1585          * return garbage SKs.
1586          */
1587         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1588                 return NULL;
1589
1590         switch (par->family) {
1591         case NFPROTO_IPV6:
1592                 sk = xt_socket_lookup_slow_v6(skb, par->in);
1593                 break;
1594         case NFPROTO_IPV4:
1595                 sk = xt_socket_lookup_slow_v4(skb, par->in);
1596                 break;
1597         default:
1598                 return NULL;
1599         }
1600
1601         if (sk) {
1602                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1603                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1604                 /*
1605                  * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1606                  * "struct inet_timewait_sock" which is missing fields.
1607                  */
1608                 if (sk->sk_state  == TCP_TIME_WAIT) {
1609                         sock_gen_put(sk);
1610                         sk = NULL;
1611                 }
1612         }
1613         return sk;
1614 }
1615
1616 static void account_for_uid(const struct sk_buff *skb,
1617                             const struct sock *alternate_sk, uid_t uid,
1618                             struct xt_action_param *par)
1619 {
1620         const struct net_device *el_dev;
1621
1622         if (!skb->dev) {
1623                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1624                 el_dev = par->in ? : par->out;
1625         } else {
1626                 const struct net_device *other_dev;
1627                 el_dev = skb->dev;
1628                 other_dev = par->in ? : par->out;
1629                 if (el_dev != other_dev) {
1630                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1631                                 "par->(in/out)=%p %s\n",
1632                                 par->hooknum, el_dev, el_dev->name, other_dev,
1633                                 other_dev->name);
1634                 }
1635         }
1636
1637         if (unlikely(!el_dev)) {
1638                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1639         } else if (unlikely(!el_dev->name)) {
1640                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1641         } else {
1642                 int proto = ipx_proto(skb, par);
1643                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1644                          par->hooknum, el_dev->name, el_dev->type,
1645                          par->family, proto);
1646
1647                 if_tag_stat_update(el_dev->name, uid,
1648                                 skb->sk ? skb->sk : alternate_sk,
1649                                 par->in ? IFS_RX : IFS_TX,
1650                                 proto, skb->len);
1651         }
1652 }
1653
1654 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1655 {
1656         const struct xt_qtaguid_match_info *info = par->matchinfo;
1657         const struct file *filp;
1658         bool got_sock = false;
1659         struct sock *sk;
1660         kuid_t sock_uid;
1661         bool res;
1662         bool set_sk_callback_lock = false;
1663
1664         if (unlikely(module_passive))
1665                 return (info->match ^ info->invert) == 0;
1666
1667         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1668                  par->hooknum, skb, par->in, par->out, par->family);
1669
1670         atomic64_inc(&qtu_events.match_calls);
1671         if (skb == NULL) {
1672                 res = (info->match ^ info->invert) == 0;
1673                 goto ret_res;
1674         }
1675
1676         switch (par->hooknum) {
1677         case NF_INET_PRE_ROUTING:
1678         case NF_INET_POST_ROUTING:
1679                 atomic64_inc(&qtu_events.match_calls_prepost);
1680                 iface_stat_update_from_skb(skb, par);
1681                 /*
1682                  * We are done in pre/post. The skb will get processed
1683                  * further alter.
1684                  */
1685                 res = (info->match ^ info->invert);
1686                 goto ret_res;
1687                 break;
1688         /* default: Fall through and do UID releated work */
1689         }
1690
1691         sk = skb->sk;
1692         /*
1693          * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1694          * "struct inet_timewait_sock" which is missing fields.
1695          * So we ignore it.
1696          */
1697         if (sk && sk->sk_state == TCP_TIME_WAIT)
1698                 sk = NULL;
1699         if (sk == NULL) {
1700                 /*
1701                  * A missing sk->sk_socket happens when packets are in-flight
1702                  * and the matching socket is already closed and gone.
1703                  */
1704                 sk = qtaguid_find_sk(skb, par);
1705                 /*
1706                  * If we got the socket from the find_sk(), we will need to put
1707                  * it back, as nf_tproxy_get_sock_v4() got it.
1708                  */
1709                 got_sock = sk;
1710                 if (sk)
1711                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1712                 else
1713                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1714         } else {
1715                 atomic64_inc(&qtu_events.match_found_sk);
1716         }
1717         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1718                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1719         if (sk != NULL) {
1720                 set_sk_callback_lock = true;
1721                 read_lock_bh(&sk->sk_callback_lock);
1722                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1723                         par->hooknum, sk, sk->sk_socket,
1724                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1725                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1726                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1727                         par->hooknum, filp ? from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1);
1728         }
1729
1730         if (sk == NULL || sk->sk_socket == NULL) {
1731                 /*
1732                  * Here, the qtaguid_find_sk() using connection tracking
1733                  * couldn't find the owner, so for now we just count them
1734                  * against the system.
1735                  */
1736                 /*
1737                  * TODO: unhack how to force just accounting.
1738                  * For now we only do iface stats when the uid-owner is not
1739                  * requested.
1740                  */
1741                 if (!(info->match & XT_QTAGUID_UID))
1742                         account_for_uid(skb, sk, 0, par);
1743                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1744                         par->hooknum,
1745                         sk ? sk->sk_socket : NULL);
1746                 res = (info->match ^ info->invert) == 0;
1747                 atomic64_inc(&qtu_events.match_no_sk);
1748                 goto put_sock_ret_res;
1749         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1750                 res = false;
1751                 goto put_sock_ret_res;
1752         }
1753         filp = sk->sk_socket->file;
1754         if (filp == NULL) {
1755                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1756                 account_for_uid(skb, sk, 0, par);
1757                 res = ((info->match ^ info->invert) &
1758                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1759                 atomic64_inc(&qtu_events.match_no_sk_file);
1760                 goto put_sock_ret_res;
1761         }
1762         sock_uid = filp->f_cred->fsuid;
1763         /*
1764          * TODO: unhack how to force just accounting.
1765          * For now we only do iface stats when the uid-owner is not requested
1766          */
1767         if (!(info->match & XT_QTAGUID_UID))
1768                 account_for_uid(skb, sk, from_kuid(&init_user_ns, sock_uid), par);
1769
1770         /*
1771          * The following two tests fail the match when:
1772          *    id not in range AND no inverted condition requested
1773          * or id     in range AND    inverted condition requested
1774          * Thus (!a && b) || (a && !b) == a ^ b
1775          */
1776         if (info->match & XT_QTAGUID_UID) {
1777                 kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
1778                 kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
1779
1780                 if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
1781                      uid_lte(filp->f_cred->fsuid, uid_max)) ^
1782                     !(info->invert & XT_QTAGUID_UID)) {
1783                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1784                                  par->hooknum);
1785                         res = false;
1786                         goto put_sock_ret_res;
1787                 }
1788         }
1789         if (info->match & XT_QTAGUID_GID) {
1790                 kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
1791                 kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
1792
1793                 if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
1794                                 gid_lte(filp->f_cred->fsgid, gid_max)) ^
1795                         !(info->invert & XT_QTAGUID_GID)) {
1796                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1797                                 par->hooknum);
1798                         res = false;
1799                         goto put_sock_ret_res;
1800                 }
1801         }
1802         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1803         res = true;
1804
1805 put_sock_ret_res:
1806         if (got_sock)
1807                 sock_gen_put(sk);
1808         if (set_sk_callback_lock)
1809                 read_unlock_bh(&sk->sk_callback_lock);
1810 ret_res:
1811         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1812         return res;
1813 }
1814
1815 #ifdef DDEBUG
1816 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1817 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1818 {
1819         va_list args;
1820         char *fmt_buff;
1821         char *buff;
1822
1823         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1824                 return;
1825
1826         fmt_buff = kasprintf(GFP_ATOMIC,
1827                              "qtaguid: %s(): %s {\n", __func__, fmt);
1828         BUG_ON(!fmt_buff);
1829         va_start(args, fmt);
1830         buff = kvasprintf(GFP_ATOMIC,
1831                           fmt_buff, args);
1832         BUG_ON(!buff);
1833         pr_debug("%s", buff);
1834         kfree(fmt_buff);
1835         kfree(buff);
1836         va_end(args);
1837
1838         spin_lock_bh(&sock_tag_list_lock);
1839         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1840         spin_unlock_bh(&sock_tag_list_lock);
1841
1842         spin_lock_bh(&sock_tag_list_lock);
1843         spin_lock_bh(&uid_tag_data_tree_lock);
1844         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1845         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1846         spin_unlock_bh(&uid_tag_data_tree_lock);
1847         spin_unlock_bh(&sock_tag_list_lock);
1848
1849         spin_lock_bh(&iface_stat_list_lock);
1850         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1851         spin_unlock_bh(&iface_stat_list_lock);
1852
1853         pr_debug("qtaguid: %s(): }\n", __func__);
1854 }
1855 #else
1856 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1857 #endif
1858
1859 struct proc_ctrl_print_info {
1860         struct sock *sk; /* socket found by reading to sk_pos */
1861         loff_t sk_pos;
1862 };
1863
1864 static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
1865 {
1866         struct proc_ctrl_print_info *pcpi = m->private;
1867         struct sock_tag *sock_tag_entry = v;
1868         struct rb_node *node;
1869
1870         (*pos)++;
1871
1872         if (!v || v  == SEQ_START_TOKEN)
1873                 return NULL;
1874
1875         node = rb_next(&sock_tag_entry->sock_node);
1876         if (!node) {
1877                 pcpi->sk = NULL;
1878                 sock_tag_entry = SEQ_START_TOKEN;
1879         } else {
1880                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1881                 pcpi->sk = sock_tag_entry->sk;
1882         }
1883         pcpi->sk_pos = *pos;
1884         return sock_tag_entry;
1885 }
1886
1887 static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
1888 {
1889         struct proc_ctrl_print_info *pcpi = m->private;
1890         struct sock_tag *sock_tag_entry;
1891         struct rb_node *node;
1892
1893         spin_lock_bh(&sock_tag_list_lock);
1894
1895         if (unlikely(module_passive))
1896                 return NULL;
1897
1898         if (*pos == 0) {
1899                 pcpi->sk_pos = 0;
1900                 node = rb_first(&sock_tag_tree);
1901                 if (!node) {
1902                         pcpi->sk = NULL;
1903                         return SEQ_START_TOKEN;
1904                 }
1905                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1906                 pcpi->sk = sock_tag_entry->sk;
1907         } else {
1908                 sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
1909                                                 NULL) ?: SEQ_START_TOKEN;
1910                 if (*pos != pcpi->sk_pos) {
1911                         /* seq_read skipped a next call */
1912                         *pos = pcpi->sk_pos;
1913                         return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
1914                 }
1915         }
1916         return sock_tag_entry;
1917 }
1918
1919 static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
1920 {
1921         spin_unlock_bh(&sock_tag_list_lock);
1922 }
1923
1924 /*
1925  * Procfs reader to get all active socket tags using style "1)" as described in
1926  * fs/proc/generic.c
1927  */
1928 static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
1929 {
1930         struct sock_tag *sock_tag_entry = v;
1931         uid_t uid;
1932         long f_count;
1933
1934         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1935                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
1936
1937         if (sock_tag_entry != SEQ_START_TOKEN) {
1938                 uid = get_uid_from_tag(sock_tag_entry->tag);
1939                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1940                          "pid=%u\n",
1941                          sock_tag_entry->sk,
1942                          sock_tag_entry->tag,
1943                          uid,
1944                          sock_tag_entry->pid
1945                         );
1946                 f_count = atomic_long_read(
1947                         &sock_tag_entry->socket->file->f_count);
1948                 seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
1949                            "f_count=%lu\n",
1950                            sock_tag_entry->sk,
1951                            sock_tag_entry->tag, uid,
1952                            sock_tag_entry->pid, f_count);
1953         } else {
1954                 seq_printf(m, "events: sockets_tagged=%llu "
1955                            "sockets_untagged=%llu "
1956                            "counter_set_changes=%llu "
1957                            "delete_cmds=%llu "
1958                            "iface_events=%llu "
1959                            "match_calls=%llu "
1960                            "match_calls_prepost=%llu "
1961                            "match_found_sk=%llu "
1962                            "match_found_sk_in_ct=%llu "
1963                            "match_found_no_sk_in_ct=%llu "
1964                            "match_no_sk=%llu "
1965                            "match_no_sk_file=%llu\n",
1966                            (u64)atomic64_read(&qtu_events.sockets_tagged),
1967                            (u64)atomic64_read(&qtu_events.sockets_untagged),
1968                            (u64)atomic64_read(&qtu_events.counter_set_changes),
1969                            (u64)atomic64_read(&qtu_events.delete_cmds),
1970                            (u64)atomic64_read(&qtu_events.iface_events),
1971                            (u64)atomic64_read(&qtu_events.match_calls),
1972                            (u64)atomic64_read(&qtu_events.match_calls_prepost),
1973                            (u64)atomic64_read(&qtu_events.match_found_sk),
1974                            (u64)atomic64_read(&qtu_events.match_found_sk_in_ct),
1975                            (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct),
1976                            (u64)atomic64_read(&qtu_events.match_no_sk),
1977                            (u64)atomic64_read(&qtu_events.match_no_sk_file));
1978
1979                 /* Count the following as part of the last item_index */
1980                 prdebug_full_state(0, "proc ctrl");
1981         }
1982
1983         return 0;
1984 }
1985
1986 /*
1987  * Delete socket tags, and stat tags associated with a given
1988  * accouting tag and uid.
1989  */
1990 static int ctrl_cmd_delete(const char *input)
1991 {
1992         char cmd;
1993         int uid_int;
1994         kuid_t uid;
1995         uid_t entry_uid;
1996         tag_t acct_tag;
1997         tag_t tag;
1998         int res, argc;
1999         struct iface_stat *iface_entry;
2000         struct rb_node *node;
2001         struct sock_tag *st_entry;
2002         struct rb_root st_to_free_tree = RB_ROOT;
2003         struct tag_stat *ts_entry;
2004         struct tag_counter_set *tcs_entry;
2005         struct tag_ref *tr_entry;
2006         struct uid_tag_data *utd_entry;
2007
2008         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid_int);
2009         uid = make_kuid(&init_user_ns, uid_int);
2010         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
2011                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2012                  acct_tag, uid_int);
2013         if (argc < 2) {
2014                 res = -EINVAL;
2015                 goto err;
2016         }
2017         if (!valid_atag(acct_tag)) {
2018                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2019                 res = -EINVAL;
2020                 goto err;
2021         }
2022         if (argc < 3) {
2023                 uid = current_fsuid();
2024                 uid_int = from_kuid(&init_user_ns, uid);
2025         } else if (!can_impersonate_uid(uid)) {
2026                 pr_info("qtaguid: ctrl_delete(%s): "
2027                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2028                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2029                 res = -EPERM;
2030                 goto err;
2031         }
2032
2033         tag = combine_atag_with_uid(acct_tag, uid_int);
2034         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2035                  "looking for tag=0x%llx (uid=%u)\n",
2036                  input, tag, uid_int);
2037
2038         /* Delete socket tags */
2039         spin_lock_bh(&sock_tag_list_lock);
2040         node = rb_first(&sock_tag_tree);
2041         while (node) {
2042                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2043                 entry_uid = get_uid_from_tag(st_entry->tag);
2044                 node = rb_next(node);
2045                 if (entry_uid != uid_int)
2046                         continue;
2047
2048                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2049                          input, st_entry->tag, entry_uid);
2050
2051                 if (!acct_tag || st_entry->tag == tag) {
2052                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2053                         /* Can't sockfd_put() within spinlock, do it later. */
2054                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2055                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2056                         BUG_ON(tr_entry->num_sock_tags <= 0);
2057                         tr_entry->num_sock_tags--;
2058                         /*
2059                          * TODO: remove if, and start failing.
2060                          * This is a hack to work around the fact that in some
2061                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2062                          * and are trying to work around apps
2063                          * that didn't open the /dev/xt_qtaguid.
2064                          */
2065                         if (st_entry->list.next && st_entry->list.prev)
2066                                 list_del(&st_entry->list);
2067                 }
2068         }
2069         spin_unlock_bh(&sock_tag_list_lock);
2070
2071         sock_tag_tree_erase(&st_to_free_tree);
2072
2073         /* Delete tag counter-sets */
2074         spin_lock_bh(&tag_counter_set_list_lock);
2075         /* Counter sets are only on the uid tag, not full tag */
2076         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2077         if (tcs_entry) {
2078                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2079                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2080                          input,
2081                          tcs_entry->tn.tag,
2082                          get_uid_from_tag(tcs_entry->tn.tag),
2083                          tcs_entry->active_set);
2084                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2085                 kfree(tcs_entry);
2086         }
2087         spin_unlock_bh(&tag_counter_set_list_lock);
2088
2089         /*
2090          * If acct_tag is 0, then all entries belonging to uid are
2091          * erased.
2092          */
2093         spin_lock_bh(&iface_stat_list_lock);
2094         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2095                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2096                 node = rb_first(&iface_entry->tag_stat_tree);
2097                 while (node) {
2098                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2099                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2100                         node = rb_next(node);
2101
2102                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2103                                  "ts tag=0x%llx (uid=%u)\n",
2104                                  input, ts_entry->tn.tag, entry_uid);
2105
2106                         if (entry_uid != uid_int)
2107                                 continue;
2108                         if (!acct_tag || ts_entry->tn.tag == tag) {
2109                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2110                                          "erase ts: %s 0x%llx %u\n",
2111                                          input, iface_entry->ifname,
2112                                          get_atag_from_tag(ts_entry->tn.tag),
2113                                          entry_uid);
2114                                 rb_erase(&ts_entry->tn.node,
2115                                          &iface_entry->tag_stat_tree);
2116                                 kfree(ts_entry);
2117                         }
2118                 }
2119                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2120         }
2121         spin_unlock_bh(&iface_stat_list_lock);
2122
2123         /* Cleanup the uid_tag_data */
2124         spin_lock_bh(&uid_tag_data_tree_lock);
2125         node = rb_first(&uid_tag_data_tree);
2126         while (node) {
2127                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2128                 entry_uid = utd_entry->uid;
2129                 node = rb_next(node);
2130
2131                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2132                          "utd uid=%u\n",
2133                          input, entry_uid);
2134
2135                 if (entry_uid != uid_int)
2136                         continue;
2137                 /*
2138                  * Go over the tag_refs, and those that don't have
2139                  * sock_tags using them are freed.
2140                  */
2141                 put_tag_ref_tree(tag, utd_entry);
2142                 put_utd_entry(utd_entry);
2143         }
2144         spin_unlock_bh(&uid_tag_data_tree_lock);
2145
2146         atomic64_inc(&qtu_events.delete_cmds);
2147         res = 0;
2148
2149 err:
2150         return res;
2151 }
2152
2153 static int ctrl_cmd_counter_set(const char *input)
2154 {
2155         char cmd;
2156         uid_t uid = 0;
2157         tag_t tag;
2158         int res, argc;
2159         struct tag_counter_set *tcs;
2160         int counter_set;
2161
2162         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2163         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2164                  "set=%d uid=%u\n", input, argc, cmd,
2165                  counter_set, uid);
2166         if (argc != 3) {
2167                 res = -EINVAL;
2168                 goto err;
2169         }
2170         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2171                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2172                         input);
2173                 res = -EINVAL;
2174                 goto err;
2175         }
2176         if (!can_manipulate_uids()) {
2177                 pr_info("qtaguid: ctrl_counterset(%s): "
2178                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2179                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2180                 res = -EPERM;
2181                 goto err;
2182         }
2183
2184         tag = make_tag_from_uid(uid);
2185         spin_lock_bh(&tag_counter_set_list_lock);
2186         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2187         if (!tcs) {
2188                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2189                 if (!tcs) {
2190                         spin_unlock_bh(&tag_counter_set_list_lock);
2191                         pr_err("qtaguid: ctrl_counterset(%s): "
2192                                "failed to alloc counter set\n",
2193                                input);
2194                         res = -ENOMEM;
2195                         goto err;
2196                 }
2197                 tcs->tn.tag = tag;
2198                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2199                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2200                          "(uid=%u) set=%d\n",
2201                          input, tag, get_uid_from_tag(tag), counter_set);
2202         }
2203         tcs->active_set = counter_set;
2204         spin_unlock_bh(&tag_counter_set_list_lock);
2205         atomic64_inc(&qtu_events.counter_set_changes);
2206         res = 0;
2207
2208 err:
2209         return res;
2210 }
2211
2212 static int ctrl_cmd_tag(const char *input)
2213 {
2214         char cmd;
2215         int sock_fd = 0;
2216         kuid_t uid;
2217         unsigned int uid_int = 0;
2218         tag_t acct_tag = make_atag_from_value(0);
2219         tag_t full_tag;
2220         struct socket *el_socket;
2221         int res, argc;
2222         struct sock_tag *sock_tag_entry;
2223         struct tag_ref *tag_ref_entry;
2224         struct uid_tag_data *uid_tag_data_entry;
2225         struct proc_qtu_data *pqd_entry;
2226
2227         /* Unassigned args will get defaulted later. */
2228         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid_int);
2229         uid = make_kuid(&init_user_ns, uid_int);
2230         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2231                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2232                  acct_tag, uid_int);
2233         if (argc < 2) {
2234                 res = -EINVAL;
2235                 goto err;
2236         }
2237         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2238         if (!el_socket) {
2239                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2240                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2241                         input, sock_fd, res, current->pid, current->tgid,
2242                         from_kuid(&init_user_ns, current_fsuid()));
2243                 goto err;
2244         }
2245         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2246                  input, atomic_long_read(&el_socket->file->f_count),
2247                  el_socket->sk);
2248         if (argc < 3) {
2249                 acct_tag = make_atag_from_value(0);
2250         } else if (!valid_atag(acct_tag)) {
2251                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2252                 res = -EINVAL;
2253                 goto err_put;
2254         }
2255         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2256                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2257                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2258                  input, current->pid, current->tgid,
2259                  from_kuid(&init_user_ns, current_uid()),
2260                  from_kuid(&init_user_ns, current_euid()),
2261                  from_kuid(&init_user_ns, current_fsuid()),
2262                  from_kgid(&init_user_ns, xt_qtaguid_ctrl_file->gid),
2263                  in_group_p(xt_qtaguid_ctrl_file->gid),
2264                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2265         if (argc < 4) {
2266                 uid = current_fsuid();
2267                 uid_int = from_kuid(&init_user_ns, uid);
2268         } else if (!can_impersonate_uid(uid)) {
2269                 pr_info("qtaguid: ctrl_tag(%s): "
2270                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2271                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2272                 res = -EPERM;
2273                 goto err_put;
2274         }
2275         full_tag = combine_atag_with_uid(acct_tag, uid_int);
2276
2277         spin_lock_bh(&sock_tag_list_lock);
2278         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2279         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2280         if (IS_ERR(tag_ref_entry)) {
2281                 res = PTR_ERR(tag_ref_entry);
2282                 spin_unlock_bh(&sock_tag_list_lock);
2283                 goto err_put;
2284         }
2285         tag_ref_entry->num_sock_tags++;
2286         if (sock_tag_entry) {
2287                 struct tag_ref *prev_tag_ref_entry;
2288
2289                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2290                          "st@%p ...->f_count=%ld\n",
2291                          input, el_socket->sk, sock_tag_entry,
2292                          atomic_long_read(&el_socket->file->f_count));
2293                 /*
2294                  * This is a re-tagging, so release the sock_fd that was
2295                  * locked at the time of the 1st tagging.
2296                  * There is still the ref from this call's sockfd_lookup() so
2297                  * it can be done within the spinlock.
2298                  */
2299                 sockfd_put(sock_tag_entry->socket);
2300                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2301                                                     &uid_tag_data_entry);
2302                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2303                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2304                 prev_tag_ref_entry->num_sock_tags--;
2305                 sock_tag_entry->tag = full_tag;
2306         } else {
2307                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2308                          input, el_socket->sk);
2309                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2310                                          GFP_ATOMIC);
2311                 if (!sock_tag_entry) {
2312                         pr_err("qtaguid: ctrl_tag(%s): "
2313                                "socket tag alloc failed\n",
2314                                input);
2315                         spin_unlock_bh(&sock_tag_list_lock);
2316                         res = -ENOMEM;
2317                         goto err_tag_unref_put;
2318                 }
2319                 sock_tag_entry->sk = el_socket->sk;
2320                 sock_tag_entry->socket = el_socket;
2321                 sock_tag_entry->pid = current->tgid;
2322                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int);
2323                 spin_lock_bh(&uid_tag_data_tree_lock);
2324                 pqd_entry = proc_qtu_data_tree_search(
2325                         &proc_qtu_data_tree, current->tgid);
2326                 /*
2327                  * TODO: remove if, and start failing.
2328                  * At first, we want to catch user-space code that is not
2329                  * opening the /dev/xt_qtaguid.
2330                  */
2331                 if (IS_ERR_OR_NULL(pqd_entry))
2332                         pr_warn_once(
2333                                 "qtaguid: %s(): "
2334                                 "User space forgot to open /dev/xt_qtaguid? "
2335                                 "pid=%u tgid=%u uid=%u\n", __func__,
2336                                 current->pid, current->tgid,
2337                                 from_kuid(&init_user_ns, current_fsuid()));
2338                 else
2339                         list_add(&sock_tag_entry->list,
2340                                  &pqd_entry->sock_tag_list);
2341                 spin_unlock_bh(&uid_tag_data_tree_lock);
2342
2343                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2344                 atomic64_inc(&qtu_events.sockets_tagged);
2345         }
2346         spin_unlock_bh(&sock_tag_list_lock);
2347         /* We keep the ref to the socket (file) until it is untagged */
2348         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2349                  input, sock_tag_entry,
2350                  atomic_long_read(&el_socket->file->f_count));
2351         return 0;
2352
2353 err_tag_unref_put:
2354         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2355         tag_ref_entry->num_sock_tags--;
2356         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2357 err_put:
2358         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2359                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2360         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2361         sockfd_put(el_socket);
2362         return res;
2363
2364 err:
2365         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2366         return res;
2367 }
2368
2369 static int ctrl_cmd_untag(const char *input)
2370 {
2371         char cmd;
2372         int sock_fd = 0;
2373         struct socket *el_socket;
2374         int res, argc;
2375         struct sock_tag *sock_tag_entry;
2376         struct tag_ref *tag_ref_entry;
2377         struct uid_tag_data *utd_entry;
2378         struct proc_qtu_data *pqd_entry;
2379
2380         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2381         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2382                  input, argc, cmd, sock_fd);
2383         if (argc < 2) {
2384                 res = -EINVAL;
2385                 goto err;
2386         }
2387         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2388         if (!el_socket) {
2389                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2390                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2391                         input, sock_fd, res, current->pid, current->tgid,
2392                         from_kuid(&init_user_ns, current_fsuid()));
2393                 goto err;
2394         }
2395         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2396                  input, atomic_long_read(&el_socket->file->f_count),
2397                  el_socket->sk);
2398         spin_lock_bh(&sock_tag_list_lock);
2399         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2400         if (!sock_tag_entry) {
2401                 spin_unlock_bh(&sock_tag_list_lock);
2402                 res = -EINVAL;
2403                 goto err_put;
2404         }
2405         /*
2406          * The socket already belongs to the current process
2407          * so it can do whatever it wants to it.
2408          */
2409         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2410
2411         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2412         BUG_ON(!tag_ref_entry);
2413         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2414         spin_lock_bh(&uid_tag_data_tree_lock);
2415         pqd_entry = proc_qtu_data_tree_search(
2416                 &proc_qtu_data_tree, current->tgid);
2417         /*
2418          * TODO: remove if, and start failing.
2419          * At first, we want to catch user-space code that is not
2420          * opening the /dev/xt_qtaguid.
2421          */
2422         if (IS_ERR_OR_NULL(pqd_entry))
2423                 pr_warn_once("qtaguid: %s(): "
2424                              "User space forgot to open /dev/xt_qtaguid? "
2425                              "pid=%u tgid=%u uid=%u\n", __func__,
2426                              current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2427         else
2428                 list_del(&sock_tag_entry->list);
2429         spin_unlock_bh(&uid_tag_data_tree_lock);
2430         /*
2431          * We don't free tag_ref from the utd_entry here,
2432          * only during a cmd_delete().
2433          */
2434         tag_ref_entry->num_sock_tags--;
2435         spin_unlock_bh(&sock_tag_list_lock);
2436         /*
2437          * Release the sock_fd that was grabbed at tag time,
2438          * and once more for the sockfd_lookup() here.
2439          */
2440         sockfd_put(sock_tag_entry->socket);
2441         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2442                  input, sock_tag_entry,
2443                  atomic_long_read(&el_socket->file->f_count) - 1);
2444         sockfd_put(el_socket);
2445
2446         kfree(sock_tag_entry);
2447         atomic64_inc(&qtu_events.sockets_untagged);
2448
2449         return 0;
2450
2451 err_put:
2452         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2453                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2454         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2455         sockfd_put(el_socket);
2456         return res;
2457
2458 err:
2459         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2460         return res;
2461 }
2462
2463 static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
2464 {
2465         char cmd;
2466         ssize_t res;
2467
2468         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2469                  input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2470
2471         cmd = input[0];
2472         /* Collect params for commands */
2473         switch (cmd) {
2474         case 'd':
2475                 res = ctrl_cmd_delete(input);
2476                 break;
2477
2478         case 's':
2479                 res = ctrl_cmd_counter_set(input);
2480                 break;
2481
2482         case 't':
2483                 res = ctrl_cmd_tag(input);
2484                 break;
2485
2486         case 'u':
2487                 res = ctrl_cmd_untag(input);
2488                 break;
2489
2490         default:
2491                 res = -EINVAL;
2492                 goto err;
2493         }
2494         if (!res)
2495                 res = count;
2496 err:
2497         CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
2498         return res;
2499 }
2500
2501 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2502 static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2503                                    size_t count, loff_t *offp)
2504 {
2505         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2506
2507         if (unlikely(module_passive))
2508                 return count;
2509
2510         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2511                 return -EINVAL;
2512
2513         if (copy_from_user(input_buf, buffer, count))
2514                 return -EFAULT;
2515
2516         input_buf[count] = '\0';
2517         return qtaguid_ctrl_parse(input_buf, count);
2518 }
2519
2520 struct proc_print_info {
2521         struct iface_stat *iface_entry;
2522         int item_index;
2523         tag_t tag; /* tag found by reading to tag_pos */
2524         off_t tag_pos;
2525         int tag_item_index;
2526 };
2527
2528 static void pp_stats_header(struct seq_file *m)
2529 {
2530         seq_puts(m,
2531                  "idx iface acct_tag_hex uid_tag_int cnt_set "
2532                  "rx_bytes rx_packets "
2533                  "tx_bytes tx_packets "
2534                  "rx_tcp_bytes rx_tcp_packets "
2535                  "rx_udp_bytes rx_udp_packets "
2536                  "rx_other_bytes rx_other_packets "
2537                  "tx_tcp_bytes tx_tcp_packets "
2538                  "tx_udp_bytes tx_udp_packets "
2539                  "tx_other_bytes tx_other_packets\n");
2540 }
2541
2542 static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
2543                          int cnt_set)
2544 {
2545         int ret;
2546         struct data_counters *cnts;
2547         tag_t tag = ts_entry->tn.tag;
2548         uid_t stat_uid = get_uid_from_tag(tag);
2549         struct proc_print_info *ppi = m->private;
2550         /* Detailed tags are not available to everybody */
2551         if (get_atag_from_tag(tag) && !can_read_other_uid_stats(
2552                                                 make_kuid(&init_user_ns,stat_uid))) {
2553                 CT_DEBUG("qtaguid: stats line: "
2554                          "%s 0x%llx %u: insufficient priv "
2555                          "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2556                          ppi->iface_entry->ifname,
2557                          get_atag_from_tag(tag), stat_uid,
2558                          current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2559                          from_kgid(&init_user_ns,xt_qtaguid_stats_file->gid));
2560                 return 0;
2561         }
2562         ppi->item_index++;
2563         cnts = &ts_entry->counters;
2564         ret = seq_printf(m, "%d %s 0x%llx %u %u "
2565                 "%llu %llu "
2566                 "%llu %llu "
2567                 "%llu %llu "
2568                 "%llu %llu "
2569                 "%llu %llu "
2570                 "%llu %llu "
2571                 "%llu %llu "
2572                 "%llu %llu\n",
2573                 ppi->item_index,
2574                 ppi->iface_entry->ifname,
2575                 get_atag_from_tag(tag),
2576                 stat_uid,
2577                 cnt_set,
2578                 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2579                 dc_sum_packets(cnts, cnt_set, IFS_RX),
2580                 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2581                 dc_sum_packets(cnts, cnt_set, IFS_TX),
2582                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2583                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2584                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2585                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2586                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2587                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2588                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2589                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2590                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2591                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2592                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2593                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2594         return ret ?: 1;
2595 }
2596
2597 static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
2598 {
2599         int ret;
2600         int counter_set;
2601         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2602              counter_set++) {
2603                 ret = pp_stats_line(m, ts_entry, counter_set);
2604                 if (ret < 0)
2605                         return false;
2606         }
2607         return true;
2608 }
2609
2610 static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
2611 {
2612         struct iface_stat *iface_entry;
2613
2614         if (!ptr)
2615                 return false;
2616
2617         list_for_each_entry(iface_entry, &iface_stat_list, list)
2618                 if (iface_entry == ptr)
2619                         return true;
2620         return false;
2621 }
2622
2623 static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
2624 {
2625         spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2626         list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
2627                 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2628                 return;
2629         }
2630         ppi->iface_entry = NULL;
2631 }
2632
2633 static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
2634 {
2635         struct proc_print_info *ppi = m->private;
2636         struct tag_stat *ts_entry;
2637         struct rb_node *node;
2638
2639         if (!v) {
2640                 pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
2641                 return NULL;
2642         }
2643
2644         (*pos)++;
2645
2646         if (!ppi->iface_entry || unlikely(module_passive))
2647                 return NULL;
2648
2649         if (v == SEQ_START_TOKEN)
2650                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2651         else
2652                 node = rb_next(&((struct tag_stat *)v)->tn.node);
2653
2654         while (!node) {
2655                 qtaguid_stats_proc_next_iface_entry(ppi);
2656                 if (!ppi->iface_entry)
2657                         return NULL;
2658                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2659         }
2660
2661         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2662         ppi->tag = ts_entry->tn.tag;
2663         ppi->tag_pos = *pos;
2664         ppi->tag_item_index = ppi->item_index;
2665         return ts_entry;
2666 }
2667
2668 static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
2669 {
2670         struct proc_print_info *ppi = m->private;
2671         struct tag_stat *ts_entry = NULL;
2672
2673         spin_lock_bh(&iface_stat_list_lock);
2674
2675         if (*pos == 0) {
2676                 ppi->item_index = 1;
2677                 ppi->tag_pos = 0;
2678                 if (list_empty(&iface_stat_list)) {
2679                         ppi->iface_entry = NULL;
2680                 } else {
2681                         ppi->iface_entry = list_first_entry(&iface_stat_list,
2682                                                             struct iface_stat,
2683                                                             list);
2684                         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2685                 }
2686                 return SEQ_START_TOKEN;
2687         }
2688         if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
2689                 if (ppi->iface_entry) {
2690                         pr_err("qtaguid: %s(): iface_entry %p not found\n",
2691                                __func__, ppi->iface_entry);
2692                         ppi->iface_entry = NULL;
2693                 }
2694                 return NULL;
2695         }
2696
2697         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2698
2699         if (!ppi->tag_pos) {
2700                 /* seq_read skipped first next call */
2701                 ts_entry = SEQ_START_TOKEN;
2702         } else {
2703                 ts_entry = tag_stat_tree_search(
2704                                 &ppi->iface_entry->tag_stat_tree, ppi->tag);
2705                 if (!ts_entry) {
2706                         pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2707                                 __func__, ppi->tag);
2708                         return NULL;
2709                 }
2710         }
2711
2712         if (*pos == ppi->tag_pos) { /* normal resume */
2713                 ppi->item_index = ppi->tag_item_index;
2714         } else {
2715                 /* seq_read skipped a next call */
2716                 *pos = ppi->tag_pos;
2717                 ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
2718         }
2719
2720         return ts_entry;
2721 }
2722
2723 static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
2724 {
2725         struct proc_print_info *ppi = m->private;
2726         if (ppi->iface_entry)
2727                 spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2728         spin_unlock_bh(&iface_stat_list_lock);
2729 }
2730
2731 /*
2732  * Procfs reader to get all tag stats using style "1)" as described in
2733  * fs/proc/generic.c
2734  * Groups all protocols tx/rx bytes.
2735  */
2736 static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
2737 {
2738         struct tag_stat *ts_entry = v;
2739
2740         if (v == SEQ_START_TOKEN)
2741                 pp_stats_header(m);
2742         else
2743                 pp_sets(m, ts_entry);
2744
2745         return 0;
2746 }
2747
2748 /*------------------------------------------*/
2749 static int qtudev_open(struct inode *inode, struct file *file)
2750 {
2751         struct uid_tag_data *utd_entry;
2752         struct proc_qtu_data  *pqd_entry;
2753         struct proc_qtu_data  *new_pqd_entry;
2754         int res;
2755         bool utd_entry_found;
2756
2757         if (unlikely(qtu_proc_handling_passive))
2758                 return 0;
2759
2760         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2761                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2762
2763         spin_lock_bh(&uid_tag_data_tree_lock);
2764
2765         /* Look for existing uid data, or alloc one. */
2766         utd_entry = get_uid_data(from_kuid(&init_user_ns, current_fsuid()), &utd_entry_found);
2767         if (IS_ERR_OR_NULL(utd_entry)) {
2768                 res = PTR_ERR(utd_entry);
2769                 goto err_unlock;
2770         }
2771
2772         /* Look for existing PID based proc_data */
2773         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2774                                               current->tgid);
2775         if (pqd_entry) {
2776                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2777                        "%s already opened\n",
2778                        current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2779                        QTU_DEV_NAME);
2780                 res = -EBUSY;
2781                 goto err_unlock_free_utd;
2782         }
2783
2784         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2785         if (!new_pqd_entry) {
2786                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2787                        "proc data alloc failed\n",
2788                        current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2789                 res = -ENOMEM;
2790                 goto err_unlock_free_utd;
2791         }
2792         new_pqd_entry->pid = current->tgid;
2793         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2794         new_pqd_entry->parent_tag_data = utd_entry;
2795         utd_entry->num_pqd++;
2796
2797         proc_qtu_data_tree_insert(new_pqd_entry,
2798                                   &proc_qtu_data_tree);
2799
2800         spin_unlock_bh(&uid_tag_data_tree_lock);
2801         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2802                  from_kuid(&init_user_ns, current_fsuid()), new_pqd_entry);
2803         file->private_data = new_pqd_entry;
2804         return 0;
2805
2806 err_unlock_free_utd:
2807         if (!utd_entry_found) {
2808                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2809                 kfree(utd_entry);
2810         }
2811 err_unlock:
2812         spin_unlock_bh(&uid_tag_data_tree_lock);
2813         return res;
2814 }
2815
2816 static int qtudev_release(struct inode *inode, struct file *file)
2817 {
2818         struct proc_qtu_data  *pqd_entry = file->private_data;
2819         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2820         struct sock_tag *st_entry;
2821         struct rb_root st_to_free_tree = RB_ROOT;
2822         struct list_head *entry, *next;
2823         struct tag_ref *tr;
2824
2825         if (unlikely(qtu_proc_handling_passive))
2826                 return 0;
2827
2828         /*
2829          * Do not trust the current->pid, it might just be a kworker cleaning
2830          * up after a dead proc.
2831          */
2832         DR_DEBUG("qtaguid: qtudev_release(): "
2833                  "pid=%u tgid=%u uid=%u "
2834                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2835                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2836                  pqd_entry, pqd_entry->pid, utd_entry,
2837                  utd_entry->num_active_tags);
2838
2839         spin_lock_bh(&sock_tag_list_lock);
2840         spin_lock_bh(&uid_tag_data_tree_lock);
2841
2842         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2843                 st_entry = list_entry(entry, struct sock_tag, list);
2844                 DR_DEBUG("qtaguid: %s(): "
2845                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2846                          __func__,
2847                          st_entry, st_entry->sk,
2848                          current->pid, current->tgid,
2849                          pqd_entry->parent_tag_data->uid);
2850
2851                 utd_entry = uid_tag_data_tree_search(
2852                         &uid_tag_data_tree,
2853                         get_uid_from_tag(st_entry->tag));
2854                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2855                 DR_DEBUG("qtaguid: %s(): "
2856                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2857                          st_entry->tag, utd_entry);
2858                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2859                                          st_entry->tag);
2860                 BUG_ON(!tr);
2861                 BUG_ON(tr->num_sock_tags <= 0);
2862                 tr->num_sock_tags--;
2863                 free_tag_ref_from_utd_entry(tr, utd_entry);
2864
2865                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2866                 list_del(&st_entry->list);
2867                 /* Can't sockfd_put() within spinlock, do it later. */
2868                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2869
2870                 /*
2871                  * Try to free the utd_entry if no other proc_qtu_data is
2872                  * using it (num_pqd is 0) and it doesn't have active tags
2873                  * (num_active_tags is 0).
2874                  */
2875                 put_utd_entry(utd_entry);
2876         }
2877
2878         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2879         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2880         pqd_entry->parent_tag_data->num_pqd--;
2881         put_utd_entry(pqd_entry->parent_tag_data);
2882         kfree(pqd_entry);
2883         file->private_data = NULL;
2884
2885         spin_unlock_bh(&uid_tag_data_tree_lock);
2886         spin_unlock_bh(&sock_tag_list_lock);
2887
2888
2889         sock_tag_tree_erase(&st_to_free_tree);
2890
2891         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2892                            current->pid, current->tgid);
2893         return 0;
2894 }
2895
2896 /*------------------------------------------*/
2897 static const struct file_operations qtudev_fops = {
2898         .owner = THIS_MODULE,
2899         .open = qtudev_open,
2900         .release = qtudev_release,
2901 };
2902
2903 static struct miscdevice qtu_device = {
2904         .minor = MISC_DYNAMIC_MINOR,
2905         .name = QTU_DEV_NAME,
2906         .fops = &qtudev_fops,
2907         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2908 };
2909
2910 static const struct seq_operations proc_qtaguid_ctrl_seqops = {
2911         .start = qtaguid_ctrl_proc_start,
2912         .next = qtaguid_ctrl_proc_next,
2913         .stop = qtaguid_ctrl_proc_stop,
2914         .show = qtaguid_ctrl_proc_show,
2915 };
2916
2917 static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
2918 {
2919         return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
2920                                 sizeof(struct proc_ctrl_print_info));
2921 }
2922
2923 static const struct file_operations proc_qtaguid_ctrl_fops = {
2924         .open           = proc_qtaguid_ctrl_open,
2925         .read           = seq_read,
2926         .write          = qtaguid_ctrl_proc_write,
2927         .llseek         = seq_lseek,
2928         .release        = seq_release_private,
2929 };
2930
2931 static const struct seq_operations proc_qtaguid_stats_seqops = {
2932         .start = qtaguid_stats_proc_start,
2933         .next = qtaguid_stats_proc_next,
2934         .stop = qtaguid_stats_proc_stop,
2935         .show = qtaguid_stats_proc_show,
2936 };
2937
2938 static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
2939 {
2940         return seq_open_private(file, &proc_qtaguid_stats_seqops,
2941                                 sizeof(struct proc_print_info));
2942 }
2943
2944 static const struct file_operations proc_qtaguid_stats_fops = {
2945         .open           = proc_qtaguid_stats_open,
2946         .read           = seq_read,
2947         .llseek         = seq_lseek,
2948         .release        = seq_release_private,
2949 };
2950
2951 /*------------------------------------------*/
2952 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2953 {
2954         int ret;
2955         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2956         if (!*res_procdir) {
2957                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2958                 ret = -ENOMEM;
2959                 goto no_dir;
2960         }
2961
2962         xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
2963                                                 *res_procdir,
2964                                                 &proc_qtaguid_ctrl_fops,
2965                                                 NULL);
2966         if (!xt_qtaguid_ctrl_file) {
2967                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2968                         " file\n");
2969                 ret = -ENOMEM;
2970                 goto no_ctrl_entry;
2971         }
2972
2973         xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
2974                                                  *res_procdir,
2975                                                  &proc_qtaguid_stats_fops,
2976                                                  NULL);
2977         if (!xt_qtaguid_stats_file) {
2978                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2979                         "file\n");
2980                 ret = -ENOMEM;
2981                 goto no_stats_entry;
2982         }
2983         /*
2984          * TODO: add support counter hacking
2985          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2986          */
2987         return 0;
2988
2989 no_stats_entry:
2990         remove_proc_entry("ctrl", *res_procdir);
2991 no_ctrl_entry:
2992         remove_proc_entry("xt_qtaguid", NULL);
2993 no_dir:
2994         return ret;
2995 }
2996
2997 static struct xt_match qtaguid_mt_reg __read_mostly = {
2998         /*
2999          * This module masquerades as the "owner" module so that iptables
3000          * tools can deal with it.
3001          */
3002         .name       = "owner",
3003         .revision   = 1,
3004         .family     = NFPROTO_UNSPEC,
3005         .match      = qtaguid_mt,
3006         .matchsize  = sizeof(struct xt_qtaguid_match_info),
3007         .me         = THIS_MODULE,
3008 };
3009
3010 static int __init qtaguid_mt_init(void)
3011 {
3012         if (qtaguid_proc_register(&xt_qtaguid_procdir)
3013             || iface_stat_init(xt_qtaguid_procdir)
3014             || xt_register_match(&qtaguid_mt_reg)
3015             || misc_register(&qtu_device))
3016                 return -1;
3017         return 0;
3018 }
3019
3020 /*
3021  * TODO: allow unloading of the module.
3022  * For now stats are permanent.
3023  * Kconfig forces'y/n' and never an 'm'.
3024  */
3025
3026 module_init(qtaguid_mt_init);
3027 MODULE_AUTHOR("jpa <jpa@google.com>");
3028 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3029 MODULE_LICENSE("GPL");
3030 MODULE_ALIAS("ipt_owner");
3031 MODULE_ALIAS("ip6t_owner");
3032 MODULE_ALIAS("ipt_qtaguid");
3033 MODULE_ALIAS("ip6t_qtaguid");