e2e7d54f9bb1874249eeb910159b9decea129cb9
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/miscdevice.h>
21 #include <linux/netfilter/x_tables.h>
22 #include <linux/netfilter/xt_qtaguid.h>
23 #include <linux/ratelimit.h>
24 #include <linux/seq_file.h>
25 #include <linux/skbuff.h>
26 #include <linux/workqueue.h>
27 #include <net/addrconf.h>
28 #include <net/sock.h>
29 #include <net/tcp.h>
30 #include <net/udp.h>
31
32 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
33 #include <linux/netfilter_ipv6/ip6_tables.h>
34 #endif
35
36 #include <linux/netfilter/xt_socket.h>
37 #include "xt_qtaguid_internal.h"
38 #include "xt_qtaguid_print.h"
39 #include "../../fs/proc/internal.h"
40
41 /*
42  * We only use the xt_socket funcs within a similar context to avoid unexpected
43  * return values.
44  */
45 #define XT_SOCKET_SUPPORTED_HOOKS \
46         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
47
48
49 static const char *module_procdirname = "xt_qtaguid";
50 static struct proc_dir_entry *xt_qtaguid_procdir;
51
52 static unsigned int proc_iface_perms = S_IRUGO;
53 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
54
55 static struct proc_dir_entry *xt_qtaguid_stats_file;
56 static unsigned int proc_stats_perms = S_IRUGO;
57 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
58
59 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
60
61 /* Everybody can write. But proc_ctrl_write_limited is true by default which
62  * limits what can be controlled. See the can_*() functions.
63  */
64 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
65 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
66
67 /* Limited by default, so the gid of the ctrl and stats proc entries
68  * will limit what can be done. See the can_*() functions.
69  */
70 static bool proc_stats_readall_limited = true;
71 static bool proc_ctrl_write_limited = true;
72
73 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
74                    S_IRUGO | S_IWUSR);
75 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
76                    S_IRUGO | S_IWUSR);
77
78 /*
79  * Limit the number of active tags (via socket tags) for a given UID.
80  * Multiple processes could share the UID.
81  */
82 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
83 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
84
85 /*
86  * After the kernel has initiallized this module, it is still possible
87  * to make it passive.
88  * Setting passive to Y:
89  *  - the iface stats handling will not act on notifications.
90  *  - iptables matches will never match.
91  *  - ctrl commands silently succeed.
92  *  - stats are always empty.
93  * This is mostly usefull when a bug is suspected.
94  */
95 static bool module_passive;
96 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
97
98 /*
99  * Control how qtaguid data is tracked per proc/uid.
100  * Setting tag_tracking_passive to Y:
101  *  - don't create proc specific structs to track tags
102  *  - don't check that active tag stats exceed some limits.
103  *  - don't clean up socket tags on process exits.
104  * This is mostly usefull when a bug is suspected.
105  */
106 static bool qtu_proc_handling_passive;
107 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
108                    S_IRUGO | S_IWUSR);
109
110 #define QTU_DEV_NAME "xt_qtaguid"
111
112 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
113 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
114
115 /*---------------------------------------------------------------------------*/
116 static const char *iface_stat_procdirname = "iface_stat";
117 static struct proc_dir_entry *iface_stat_procdir;
118 /*
119  * The iface_stat_all* will go away once userspace gets use to the new fields
120  * that have a format line.
121  */
122 static const char *iface_stat_all_procfilename = "iface_stat_all";
123 static struct proc_dir_entry *iface_stat_all_procfile;
124 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
125 static struct proc_dir_entry *iface_stat_fmt_procfile;
126
127
128 static LIST_HEAD(iface_stat_list);
129 static DEFINE_SPINLOCK(iface_stat_list_lock);
130
131 static struct rb_root sock_tag_tree = RB_ROOT;
132 static DEFINE_SPINLOCK(sock_tag_list_lock);
133
134 static struct rb_root tag_counter_set_tree = RB_ROOT;
135 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
136
137 static struct rb_root uid_tag_data_tree = RB_ROOT;
138 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
139
140 static struct rb_root proc_qtu_data_tree = RB_ROOT;
141 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
142
143 static struct qtaguid_event_counts qtu_events;
144 /*----------------------------------------------*/
145 static bool can_manipulate_uids(void)
146 {
147         /* root pwnd */
148         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
149                 || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || unlikely(!proc_ctrl_write_limited)
150                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
151 }
152
153 static bool can_impersonate_uid(kuid_t uid)
154 {
155         return uid_eq(uid, current_fsuid()) || can_manipulate_uids();
156 }
157
158 static bool can_read_other_uid_stats(kuid_t uid)
159 {
160         /* root pwnd */
161         return in_egroup_p(xt_qtaguid_stats_file->gid)
162                 || unlikely(!from_kuid(&init_user_ns, current_fsuid())) || uid_eq(uid, current_fsuid())
163                 || unlikely(!proc_stats_readall_limited)
164                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
165 }
166
167 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
168                                   enum ifs_tx_rx direction,
169                                   enum ifs_proto ifs_proto,
170                                   int bytes,
171                                   int packets)
172 {
173         counters->bpc[set][direction][ifs_proto].bytes += bytes;
174         counters->bpc[set][direction][ifs_proto].packets += packets;
175 }
176
177 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
178 {
179         struct rb_node *node = root->rb_node;
180
181         while (node) {
182                 struct tag_node *data = rb_entry(node, struct tag_node, node);
183                 int result;
184                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
185                          " node=%p data=%p\n", tag, node, data);
186                 result = tag_compare(tag, data->tag);
187                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
188                          " data.tag=0x%llx (uid=%u) res=%d\n",
189                          tag, data->tag, get_uid_from_tag(data->tag), result);
190                 if (result < 0)
191                         node = node->rb_left;
192                 else if (result > 0)
193                         node = node->rb_right;
194                 else
195                         return data;
196         }
197         return NULL;
198 }
199
200 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
201 {
202         struct rb_node **new = &(root->rb_node), *parent = NULL;
203
204         /* Figure out where to put new node */
205         while (*new) {
206                 struct tag_node *this = rb_entry(*new, struct tag_node,
207                                                  node);
208                 int result = tag_compare(data->tag, this->tag);
209                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
210                          " (uid=%u)\n", __func__,
211                          this->tag,
212                          get_uid_from_tag(this->tag));
213                 parent = *new;
214                 if (result < 0)
215                         new = &((*new)->rb_left);
216                 else if (result > 0)
217                         new = &((*new)->rb_right);
218                 else
219                         BUG();
220         }
221
222         /* Add new node and rebalance tree. */
223         rb_link_node(&data->node, parent, new);
224         rb_insert_color(&data->node, root);
225 }
226
227 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
228 {
229         tag_node_tree_insert(&data->tn, root);
230 }
231
232 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
233 {
234         struct tag_node *node = tag_node_tree_search(root, tag);
235         if (!node)
236                 return NULL;
237         return rb_entry(&node->node, struct tag_stat, tn.node);
238 }
239
240 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
241                                         struct rb_root *root)
242 {
243         tag_node_tree_insert(&data->tn, root);
244 }
245
246 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
247                                                            tag_t tag)
248 {
249         struct tag_node *node = tag_node_tree_search(root, tag);
250         if (!node)
251                 return NULL;
252         return rb_entry(&node->node, struct tag_counter_set, tn.node);
253
254 }
255
256 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
257 {
258         tag_node_tree_insert(&data->tn, root);
259 }
260
261 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
262 {
263         struct tag_node *node = tag_node_tree_search(root, tag);
264         if (!node)
265                 return NULL;
266         return rb_entry(&node->node, struct tag_ref, tn.node);
267 }
268
269 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
270                                              const struct sock *sk)
271 {
272         struct rb_node *node = root->rb_node;
273
274         while (node) {
275                 struct sock_tag *data = rb_entry(node, struct sock_tag,
276                                                  sock_node);
277                 if (sk < data->sk)
278                         node = node->rb_left;
279                 else if (sk > data->sk)
280                         node = node->rb_right;
281                 else
282                         return data;
283         }
284         return NULL;
285 }
286
287 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
288 {
289         struct rb_node **new = &(root->rb_node), *parent = NULL;
290
291         /* Figure out where to put new node */
292         while (*new) {
293                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
294                                                  sock_node);
295                 parent = *new;
296                 if (data->sk < this->sk)
297                         new = &((*new)->rb_left);
298                 else if (data->sk > this->sk)
299                         new = &((*new)->rb_right);
300                 else
301                         BUG();
302         }
303
304         /* Add new node and rebalance tree. */
305         rb_link_node(&data->sock_node, parent, new);
306         rb_insert_color(&data->sock_node, root);
307 }
308
309 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
310 {
311         struct rb_node *node;
312         struct sock_tag *st_entry;
313
314         node = rb_first(st_to_free_tree);
315         while (node) {
316                 st_entry = rb_entry(node, struct sock_tag, sock_node);
317                 node = rb_next(node);
318                 CT_DEBUG("qtaguid: %s(): "
319                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
320                          st_entry->sk,
321                          st_entry->tag,
322                          get_uid_from_tag(st_entry->tag));
323                 rb_erase(&st_entry->sock_node, st_to_free_tree);
324                 sockfd_put(st_entry->socket);
325                 kfree(st_entry);
326         }
327 }
328
329 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
330                                                        const pid_t pid)
331 {
332         struct rb_node *node = root->rb_node;
333
334         while (node) {
335                 struct proc_qtu_data *data = rb_entry(node,
336                                                       struct proc_qtu_data,
337                                                       node);
338                 if (pid < data->pid)
339                         node = node->rb_left;
340                 else if (pid > data->pid)
341                         node = node->rb_right;
342                 else
343                         return data;
344         }
345         return NULL;
346 }
347
348 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
349                                       struct rb_root *root)
350 {
351         struct rb_node **new = &(root->rb_node), *parent = NULL;
352
353         /* Figure out where to put new node */
354         while (*new) {
355                 struct proc_qtu_data *this = rb_entry(*new,
356                                                       struct proc_qtu_data,
357                                                       node);
358                 parent = *new;
359                 if (data->pid < this->pid)
360                         new = &((*new)->rb_left);
361                 else if (data->pid > this->pid)
362                         new = &((*new)->rb_right);
363                 else
364                         BUG();
365         }
366
367         /* Add new node and rebalance tree. */
368         rb_link_node(&data->node, parent, new);
369         rb_insert_color(&data->node, root);
370 }
371
372 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
373                                      struct rb_root *root)
374 {
375         struct rb_node **new = &(root->rb_node), *parent = NULL;
376
377         /* Figure out where to put new node */
378         while (*new) {
379                 struct uid_tag_data *this = rb_entry(*new,
380                                                      struct uid_tag_data,
381                                                      node);
382                 parent = *new;
383                 if (data->uid < this->uid)
384                         new = &((*new)->rb_left);
385                 else if (data->uid > this->uid)
386                         new = &((*new)->rb_right);
387                 else
388                         BUG();
389         }
390
391         /* Add new node and rebalance tree. */
392         rb_link_node(&data->node, parent, new);
393         rb_insert_color(&data->node, root);
394 }
395
396 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
397                                                      uid_t uid)
398 {
399         struct rb_node *node = root->rb_node;
400
401         while (node) {
402                 struct uid_tag_data *data = rb_entry(node,
403                                                      struct uid_tag_data,
404                                                      node);
405                 if (uid < data->uid)
406                         node = node->rb_left;
407                 else if (uid > data->uid)
408                         node = node->rb_right;
409                 else
410                         return data;
411         }
412         return NULL;
413 }
414
415 /*
416  * Allocates a new uid_tag_data struct if needed.
417  * Returns a pointer to the found or allocated uid_tag_data.
418  * Returns a PTR_ERR on failures, and lock is not held.
419  * If found is not NULL:
420  *   sets *found to true if not allocated.
421  *   sets *found to false if allocated.
422  */
423 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
424 {
425         struct uid_tag_data *utd_entry;
426
427         /* Look for top level uid_tag_data for the UID */
428         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
429         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
430
431         if (found_res)
432                 *found_res = utd_entry;
433         if (utd_entry)
434                 return utd_entry;
435
436         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
437         if (!utd_entry) {
438                 pr_err("qtaguid: get_uid_data(%u): "
439                        "tag data alloc failed\n", uid);
440                 return ERR_PTR(-ENOMEM);
441         }
442
443         utd_entry->uid = uid;
444         utd_entry->tag_ref_tree = RB_ROOT;
445         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
446         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
447         return utd_entry;
448 }
449
450 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
451 static struct tag_ref *new_tag_ref(tag_t new_tag,
452                                    struct uid_tag_data *utd_entry)
453 {
454         struct tag_ref *tr_entry;
455         int res;
456
457         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
458                 pr_info("qtaguid: new_tag_ref(0x%llx): "
459                         "tag ref alloc quota exceeded. max=%d\n",
460                         new_tag, max_sock_tags);
461                 res = -EMFILE;
462                 goto err_res;
463
464         }
465
466         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
467         if (!tr_entry) {
468                 pr_err("qtaguid: new_tag_ref(0x%llx): "
469                        "tag ref alloc failed\n",
470                        new_tag);
471                 res = -ENOMEM;
472                 goto err_res;
473         }
474         tr_entry->tn.tag = new_tag;
475         /* tr_entry->num_sock_tags  handled by caller */
476         utd_entry->num_active_tags++;
477         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
478         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
479                  " inserted new tag ref %p\n",
480                  new_tag, tr_entry);
481         return tr_entry;
482
483 err_res:
484         return ERR_PTR(res);
485 }
486
487 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
488                                       struct uid_tag_data **utd_res)
489 {
490         struct uid_tag_data *utd_entry;
491         struct tag_ref *tr_entry;
492         bool found_utd;
493         uid_t uid = get_uid_from_tag(full_tag);
494
495         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
496                  full_tag, uid);
497
498         utd_entry = get_uid_data(uid, &found_utd);
499         if (IS_ERR_OR_NULL(utd_entry)) {
500                 if (utd_res)
501                         *utd_res = utd_entry;
502                 return NULL;
503         }
504
505         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
506         if (utd_res)
507                 *utd_res = utd_entry;
508         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
509                  full_tag, utd_entry, tr_entry);
510         return tr_entry;
511 }
512
513 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
514 static struct tag_ref *get_tag_ref(tag_t full_tag,
515                                    struct uid_tag_data **utd_res)
516 {
517         struct uid_tag_data *utd_entry;
518         struct tag_ref *tr_entry;
519
520         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
521                  full_tag);
522         spin_lock_bh(&uid_tag_data_tree_lock);
523         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
524         BUG_ON(IS_ERR_OR_NULL(utd_entry));
525         if (!tr_entry)
526                 tr_entry = new_tag_ref(full_tag, utd_entry);
527
528         spin_unlock_bh(&uid_tag_data_tree_lock);
529         if (utd_res)
530                 *utd_res = utd_entry;
531         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
532                  full_tag, utd_entry, tr_entry);
533         return tr_entry;
534 }
535
536 /* Checks and maybe frees the UID Tag Data entry */
537 static void put_utd_entry(struct uid_tag_data *utd_entry)
538 {
539         /* Are we done with the UID tag data entry? */
540         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
541                 !utd_entry->num_pqd) {
542                 DR_DEBUG("qtaguid: %s(): "
543                          "erase utd_entry=%p uid=%u "
544                          "by pid=%u tgid=%u uid=%u\n", __func__,
545                          utd_entry, utd_entry->uid,
546                          current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
547                 BUG_ON(utd_entry->num_active_tags);
548                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
549                 kfree(utd_entry);
550         } else {
551                 DR_DEBUG("qtaguid: %s(): "
552                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
553                          __func__, utd_entry, utd_entry->num_active_tags,
554                          utd_entry->num_pqd);
555                 BUG_ON(!(utd_entry->num_active_tags ||
556                          utd_entry->num_pqd));
557         }
558 }
559
560 /*
561  * If no sock_tags are using this tag_ref,
562  * decrements refcount of utd_entry, removes tr_entry
563  * from utd_entry->tag_ref_tree and frees.
564  */
565 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
566                                         struct uid_tag_data *utd_entry)
567 {
568         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
569                  tr_entry, tr_entry->tn.tag,
570                  get_uid_from_tag(tr_entry->tn.tag));
571         if (!tr_entry->num_sock_tags) {
572                 BUG_ON(!utd_entry->num_active_tags);
573                 utd_entry->num_active_tags--;
574                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
575                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
576                 kfree(tr_entry);
577         }
578 }
579
580 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
581 {
582         struct rb_node *node;
583         struct tag_ref *tr_entry;
584         tag_t acct_tag;
585
586         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
587                  full_tag, get_uid_from_tag(full_tag));
588         acct_tag = get_atag_from_tag(full_tag);
589         node = rb_first(&utd_entry->tag_ref_tree);
590         while (node) {
591                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
592                 node = rb_next(node);
593                 if (!acct_tag || tr_entry->tn.tag == full_tag)
594                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
595         }
596 }
597
598 static ssize_t read_proc_u64(struct file *file, char __user *buf,
599                          size_t size, loff_t *ppos)
600 {
601         uint64_t *valuep = PDE_DATA(file_inode(file));
602         char tmp[24];
603         size_t tmp_size;
604
605         tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
606         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
607 }
608
609 static ssize_t read_proc_bool(struct file *file, char __user *buf,
610                           size_t size, loff_t *ppos)
611 {
612         bool *valuep = PDE_DATA(file_inode(file));
613         char tmp[24];
614         size_t tmp_size;
615
616         tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
617         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
618 }
619
620 static int get_active_counter_set(tag_t tag)
621 {
622         int active_set = 0;
623         struct tag_counter_set *tcs;
624
625         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
626                  " (uid=%u)\n",
627                  tag, get_uid_from_tag(tag));
628         /* For now we only handle UID tags for active sets */
629         tag = get_utag_from_tag(tag);
630         spin_lock_bh(&tag_counter_set_list_lock);
631         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
632         if (tcs)
633                 active_set = tcs->active_set;
634         spin_unlock_bh(&tag_counter_set_list_lock);
635         return active_set;
636 }
637
638 /*
639  * Find the entry for tracking the specified interface.
640  * Caller must hold iface_stat_list_lock
641  */
642 static struct iface_stat *get_iface_entry(const char *ifname)
643 {
644         struct iface_stat *iface_entry;
645
646         /* Find the entry for tracking the specified tag within the interface */
647         if (ifname == NULL) {
648                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
649                 return NULL;
650         }
651
652         /* Iterate over interfaces */
653         list_for_each_entry(iface_entry, &iface_stat_list, list) {
654                 if (!strcmp(ifname, iface_entry->ifname))
655                         goto done;
656         }
657         iface_entry = NULL;
658 done:
659         return iface_entry;
660 }
661
662 /* This is for fmt2 only */
663 static void pp_iface_stat_header(struct seq_file *m)
664 {
665         seq_puts(m,
666                  "ifname "
667                  "total_skb_rx_bytes total_skb_rx_packets "
668                  "total_skb_tx_bytes total_skb_tx_packets "
669                  "rx_tcp_bytes rx_tcp_packets "
670                  "rx_udp_bytes rx_udp_packets "
671                  "rx_other_bytes rx_other_packets "
672                  "tx_tcp_bytes tx_tcp_packets "
673                  "tx_udp_bytes tx_udp_packets "
674                  "tx_other_bytes tx_other_packets\n"
675         );
676 }
677
678 static void pp_iface_stat_line(struct seq_file *m,
679                                struct iface_stat *iface_entry)
680 {
681         struct data_counters *cnts;
682         int cnt_set = 0;   /* We only use one set for the device */
683         cnts = &iface_entry->totals_via_skb;
684         seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
685                    "%llu %llu %llu %llu %llu %llu %llu %llu\n",
686                    iface_entry->ifname,
687                    dc_sum_bytes(cnts, cnt_set, IFS_RX),
688                    dc_sum_packets(cnts, cnt_set, IFS_RX),
689                    dc_sum_bytes(cnts, cnt_set, IFS_TX),
690                    dc_sum_packets(cnts, cnt_set, IFS_TX),
691                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
692                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
693                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
694                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
695                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
696                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
697                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
698                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
699                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
700                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
701                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
702                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
703 }
704
705 struct proc_iface_stat_fmt_info {
706         int fmt;
707 };
708
709 static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
710 {
711         struct proc_iface_stat_fmt_info *p = m->private;
712         loff_t n = *pos;
713
714         /*
715          * This lock will prevent iface_stat_update() from changing active,
716          * and in turn prevent an interface from unregistering itself.
717          */
718         spin_lock_bh(&iface_stat_list_lock);
719
720         if (unlikely(module_passive))
721                 return NULL;
722
723         if (!n && p->fmt == 2)
724                 pp_iface_stat_header(m);
725
726         return seq_list_start(&iface_stat_list, n);
727 }
728
729 static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
730 {
731         return seq_list_next(p, &iface_stat_list, pos);
732 }
733
734 static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
735 {
736         spin_unlock_bh(&iface_stat_list_lock);
737 }
738
739 static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
740 {
741         struct proc_iface_stat_fmt_info *p = m->private;
742         struct iface_stat *iface_entry;
743         struct rtnl_link_stats64 dev_stats, *stats;
744         struct rtnl_link_stats64 no_dev_stats = {0};
745
746
747         CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
748                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
749
750         iface_entry = list_entry(v, struct iface_stat, list);
751
752         if (iface_entry->active) {
753                 stats = dev_get_stats(iface_entry->net_dev,
754                                       &dev_stats);
755         } else {
756                 stats = &no_dev_stats;
757         }
758         /*
759          * If the meaning of the data changes, then update the fmtX
760          * string.
761          */
762         if (p->fmt == 1) {
763                 seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
764                            iface_entry->ifname,
765                            iface_entry->active,
766                            iface_entry->totals_via_dev[IFS_RX].bytes,
767                            iface_entry->totals_via_dev[IFS_RX].packets,
768                            iface_entry->totals_via_dev[IFS_TX].bytes,
769                            iface_entry->totals_via_dev[IFS_TX].packets,
770                            stats->rx_bytes, stats->rx_packets,
771                            stats->tx_bytes, stats->tx_packets
772                            );
773         } else {
774                 pp_iface_stat_line(m, iface_entry);
775         }
776         return 0;
777 }
778
779 static const struct file_operations read_u64_fops = {
780         .read           = read_proc_u64,
781         .llseek         = default_llseek,
782 };
783
784 static const struct file_operations read_bool_fops = {
785         .read           = read_proc_bool,
786         .llseek         = default_llseek,
787 };
788
789 static void iface_create_proc_worker(struct work_struct *work)
790 {
791         struct proc_dir_entry *proc_entry;
792         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
793                                                    iface_work);
794         struct iface_stat *new_iface  = isw->iface_entry;
795
796         /* iface_entries are not deleted, so safe to manipulate. */
797         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
798         if (IS_ERR_OR_NULL(proc_entry)) {
799                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
800                 kfree(isw);
801                 return;
802         }
803
804         new_iface->proc_ptr = proc_entry;
805
806         proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
807                          &read_u64_fops,
808                          &new_iface->totals_via_dev[IFS_TX].bytes);
809         proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
810                          &read_u64_fops,
811                          &new_iface->totals_via_dev[IFS_RX].bytes);
812         proc_create_data("tx_packets", proc_iface_perms, proc_entry,
813                          &read_u64_fops,
814                          &new_iface->totals_via_dev[IFS_TX].packets);
815         proc_create_data("rx_packets", proc_iface_perms, proc_entry,
816                          &read_u64_fops,
817                          &new_iface->totals_via_dev[IFS_RX].packets);
818         proc_create_data("active", proc_iface_perms, proc_entry,
819                          &read_bool_fops, &new_iface->active);
820
821         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
822                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
823         kfree(isw);
824 }
825
826 /*
827  * Will set the entry's active state, and
828  * update the net_dev accordingly also.
829  */
830 static void _iface_stat_set_active(struct iface_stat *entry,
831                                    struct net_device *net_dev,
832                                    bool activate)
833 {
834         if (activate) {
835                 entry->net_dev = net_dev;
836                 entry->active = true;
837                 IF_DEBUG("qtaguid: %s(%s): "
838                          "enable tracking. rfcnt=%d\n", __func__,
839                          entry->ifname,
840                          __this_cpu_read(*net_dev->pcpu_refcnt));
841         } else {
842                 entry->active = false;
843                 entry->net_dev = NULL;
844                 IF_DEBUG("qtaguid: %s(%s): "
845                          "disable tracking. rfcnt=%d\n", __func__,
846                          entry->ifname,
847                          __this_cpu_read(*net_dev->pcpu_refcnt));
848
849         }
850 }
851
852 /* Caller must hold iface_stat_list_lock */
853 static struct iface_stat *iface_alloc(struct net_device *net_dev)
854 {
855         struct iface_stat *new_iface;
856         struct iface_stat_work *isw;
857
858         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
859         if (new_iface == NULL) {
860                 pr_err("qtaguid: iface_stat: create(%s): "
861                        "iface_stat alloc failed\n", net_dev->name);
862                 return NULL;
863         }
864         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
865         if (new_iface->ifname == NULL) {
866                 pr_err("qtaguid: iface_stat: create(%s): "
867                        "ifname alloc failed\n", net_dev->name);
868                 kfree(new_iface);
869                 return NULL;
870         }
871         spin_lock_init(&new_iface->tag_stat_list_lock);
872         new_iface->tag_stat_tree = RB_ROOT;
873         _iface_stat_set_active(new_iface, net_dev, true);
874
875         /*
876          * ipv6 notifier chains are atomic :(
877          * No create_proc_read_entry() for you!
878          */
879         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
880         if (!isw) {
881                 pr_err("qtaguid: iface_stat: create(%s): "
882                        "work alloc failed\n", new_iface->ifname);
883                 _iface_stat_set_active(new_iface, net_dev, false);
884                 kfree(new_iface->ifname);
885                 kfree(new_iface);
886                 return NULL;
887         }
888         isw->iface_entry = new_iface;
889         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
890         schedule_work(&isw->iface_work);
891         list_add(&new_iface->list, &iface_stat_list);
892         return new_iface;
893 }
894
895 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
896                                                struct iface_stat *iface)
897 {
898         struct rtnl_link_stats64 dev_stats, *stats;
899         bool stats_rewound;
900
901         stats = dev_get_stats(net_dev, &dev_stats);
902         /* No empty packets */
903         stats_rewound =
904                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
905                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
906
907         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
908                  "bytes rx/tx=%llu/%llu "
909                  "active=%d last_known=%d "
910                  "stats_rewound=%d\n", __func__,
911                  net_dev ? net_dev->name : "?",
912                  iface, net_dev,
913                  stats->rx_bytes, stats->tx_bytes,
914                  iface->active, iface->last_known_valid, stats_rewound);
915
916         if (iface->active && iface->last_known_valid && stats_rewound) {
917                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
918                              "iface reset its stats unexpectedly\n", __func__,
919                              net_dev->name);
920
921                 iface->totals_via_dev[IFS_TX].bytes +=
922                         iface->last_known[IFS_TX].bytes;
923                 iface->totals_via_dev[IFS_TX].packets +=
924                         iface->last_known[IFS_TX].packets;
925                 iface->totals_via_dev[IFS_RX].bytes +=
926                         iface->last_known[IFS_RX].bytes;
927                 iface->totals_via_dev[IFS_RX].packets +=
928                         iface->last_known[IFS_RX].packets;
929                 iface->last_known_valid = false;
930                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
931                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
932                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
933                          iface->last_known[IFS_TX].bytes);
934         }
935 }
936
937 /*
938  * Create a new entry for tracking the specified interface.
939  * Do nothing if the entry already exists.
940  * Called when an interface is configured with a valid IP address.
941  */
942 static void iface_stat_create(struct net_device *net_dev,
943                               struct in_ifaddr *ifa)
944 {
945         struct in_device *in_dev = NULL;
946         const char *ifname;
947         struct iface_stat *entry;
948         __be32 ipaddr = 0;
949         struct iface_stat *new_iface;
950
951         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
952                  net_dev ? net_dev->name : "?",
953                  ifa, net_dev);
954         if (!net_dev) {
955                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
956                 return;
957         }
958
959         ifname = net_dev->name;
960         if (!ifa) {
961                 in_dev = in_dev_get(net_dev);
962                 if (!in_dev) {
963                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
964                                ifname);
965                         return;
966                 }
967                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
968                          ifname, in_dev);
969                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
970                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
971                                  "ifa=%p ifa_label=%s\n",
972                                  ifname, ifa,
973                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
974                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
975                                 break;
976                 }
977         }
978
979         if (!ifa) {
980                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
981                          ifname);
982                 goto done_put;
983         }
984         ipaddr = ifa->ifa_local;
985
986         spin_lock_bh(&iface_stat_list_lock);
987         entry = get_iface_entry(ifname);
988         if (entry != NULL) {
989                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
990                          ifname, entry);
991                 iface_check_stats_reset_and_adjust(net_dev, entry);
992                 _iface_stat_set_active(entry, net_dev, true);
993                 IF_DEBUG("qtaguid: %s(%s): "
994                          "tracking now %d on ip=%pI4\n", __func__,
995                          entry->ifname, true, &ipaddr);
996                 goto done_unlock_put;
997         }
998
999         new_iface = iface_alloc(net_dev);
1000         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1001                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1002 done_unlock_put:
1003         spin_unlock_bh(&iface_stat_list_lock);
1004 done_put:
1005         if (in_dev)
1006                 in_dev_put(in_dev);
1007 }
1008
1009 static void iface_stat_create_ipv6(struct net_device *net_dev,
1010                                    struct inet6_ifaddr *ifa)
1011 {
1012         struct in_device *in_dev;
1013         const char *ifname;
1014         struct iface_stat *entry;
1015         struct iface_stat *new_iface;
1016         int addr_type;
1017
1018         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1019                  ifa, net_dev, net_dev ? net_dev->name : "");
1020         if (!net_dev) {
1021                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1022                 return;
1023         }
1024         ifname = net_dev->name;
1025
1026         in_dev = in_dev_get(net_dev);
1027         if (!in_dev) {
1028                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1029                        ifname);
1030                 return;
1031         }
1032
1033         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1034                  ifname, in_dev);
1035
1036         if (!ifa) {
1037                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1038                          ifname);
1039                 goto done_put;
1040         }
1041         addr_type = ipv6_addr_type(&ifa->addr);
1042
1043         spin_lock_bh(&iface_stat_list_lock);
1044         entry = get_iface_entry(ifname);
1045         if (entry != NULL) {
1046                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1047                          ifname, entry);
1048                 iface_check_stats_reset_and_adjust(net_dev, entry);
1049                 _iface_stat_set_active(entry, net_dev, true);
1050                 IF_DEBUG("qtaguid: %s(%s): "
1051                          "tracking now %d on ip=%pI6c\n", __func__,
1052                          entry->ifname, true, &ifa->addr);
1053                 goto done_unlock_put;
1054         }
1055
1056         new_iface = iface_alloc(net_dev);
1057         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1058                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1059
1060 done_unlock_put:
1061         spin_unlock_bh(&iface_stat_list_lock);
1062 done_put:
1063         in_dev_put(in_dev);
1064 }
1065
1066 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1067 {
1068         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1069         return sock_tag_tree_search(&sock_tag_tree, sk);
1070 }
1071
1072 static struct sock_tag *get_sock_stat(const struct sock *sk)
1073 {
1074         struct sock_tag *sock_tag_entry;
1075         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1076         if (!sk)
1077                 return NULL;
1078         spin_lock_bh(&sock_tag_list_lock);
1079         sock_tag_entry = get_sock_stat_nl(sk);
1080         spin_unlock_bh(&sock_tag_list_lock);
1081         return sock_tag_entry;
1082 }
1083
1084 static int ipx_proto(const struct sk_buff *skb,
1085                      struct xt_action_param *par)
1086 {
1087         int thoff = 0, tproto;
1088
1089         switch (par->family) {
1090         case NFPROTO_IPV6:
1091                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1092                 if (tproto < 0)
1093                         MT_DEBUG("%s(): transport header not found in ipv6"
1094                                  " skb=%p\n", __func__, skb);
1095                 break;
1096         case NFPROTO_IPV4:
1097                 tproto = ip_hdr(skb)->protocol;
1098                 break;
1099         default:
1100                 tproto = IPPROTO_RAW;
1101         }
1102         return tproto;
1103 }
1104
1105 static void
1106 data_counters_update(struct data_counters *dc, int set,
1107                      enum ifs_tx_rx direction, int proto, int bytes)
1108 {
1109         switch (proto) {
1110         case IPPROTO_TCP:
1111                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1112                 break;
1113         case IPPROTO_UDP:
1114                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1115                 break;
1116         case IPPROTO_IP:
1117         default:
1118                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1119                                     1);
1120                 break;
1121         }
1122 }
1123
1124 /*
1125  * Update stats for the specified interface. Do nothing if the entry
1126  * does not exist (when a device was never configured with an IP address).
1127  * Called when an device is being unregistered.
1128  */
1129 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1130 {
1131         struct rtnl_link_stats64 dev_stats, *stats;
1132         struct iface_stat *entry;
1133
1134         stats = dev_get_stats(net_dev, &dev_stats);
1135         spin_lock_bh(&iface_stat_list_lock);
1136         entry = get_iface_entry(net_dev->name);
1137         if (entry == NULL) {
1138                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1139                          net_dev->name);
1140                 spin_unlock_bh(&iface_stat_list_lock);
1141                 return;
1142         }
1143
1144         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1145                  net_dev->name, entry);
1146         if (!entry->active) {
1147                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1148                          net_dev->name);
1149                 spin_unlock_bh(&iface_stat_list_lock);
1150                 return;
1151         }
1152
1153         if (stash_only) {
1154                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1155                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1156                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1157                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1158                 entry->last_known_valid = true;
1159                 IF_DEBUG("qtaguid: %s(%s): "
1160                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1161                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1162                 spin_unlock_bh(&iface_stat_list_lock);
1163                 return;
1164         }
1165         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1166         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1167         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1168         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1169         /* We don't need the last_known[] anymore */
1170         entry->last_known_valid = false;
1171         _iface_stat_set_active(entry, net_dev, false);
1172         IF_DEBUG("qtaguid: %s(%s): "
1173                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1174                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1175         spin_unlock_bh(&iface_stat_list_lock);
1176 }
1177
1178 /*
1179  * Update stats for the specified interface from the skb.
1180  * Do nothing if the entry
1181  * does not exist (when a device was never configured with an IP address).
1182  * Called on each sk.
1183  */
1184 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1185                                        struct xt_action_param *par)
1186 {
1187         struct iface_stat *entry;
1188         const struct net_device *el_dev;
1189         enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1190         int bytes = skb->len;
1191         int proto;
1192
1193         if (!skb->dev) {
1194                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1195                 el_dev = par->in ? : par->out;
1196         } else {
1197                 const struct net_device *other_dev;
1198                 el_dev = skb->dev;
1199                 other_dev = par->in ? : par->out;
1200                 if (el_dev != other_dev) {
1201                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1202                                  "par->(in/out)=%p %s\n",
1203                                  par->hooknum, el_dev, el_dev->name, other_dev,
1204                                  other_dev->name);
1205                 }
1206         }
1207
1208         if (unlikely(!el_dev)) {
1209                 pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
1210                                    par->hooknum, __func__);
1211                 BUG();
1212         } else if (unlikely(!el_dev->name)) {
1213                 pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
1214                                    par->hooknum, __func__);
1215                 BUG();
1216         } else {
1217                 proto = ipx_proto(skb, par);
1218                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1219                          par->hooknum, el_dev->name, el_dev->type,
1220                          par->family, proto);
1221         }
1222
1223         spin_lock_bh(&iface_stat_list_lock);
1224         entry = get_iface_entry(el_dev->name);
1225         if (entry == NULL) {
1226                 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1227                          __func__, el_dev->name);
1228                 spin_unlock_bh(&iface_stat_list_lock);
1229                 return;
1230         }
1231
1232         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1233                  el_dev->name, entry);
1234
1235         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1236                              bytes);
1237         spin_unlock_bh(&iface_stat_list_lock);
1238 }
1239
1240 static void tag_stat_update(struct tag_stat *tag_entry,
1241                         enum ifs_tx_rx direction, int proto, int bytes)
1242 {
1243         int active_set;
1244         active_set = get_active_counter_set(tag_entry->tn.tag);
1245         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1246                  "dir=%d proto=%d bytes=%d)\n",
1247                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1248                  active_set, direction, proto, bytes);
1249         data_counters_update(&tag_entry->counters, active_set, direction,
1250                              proto, bytes);
1251         if (tag_entry->parent_counters)
1252                 data_counters_update(tag_entry->parent_counters, active_set,
1253                                      direction, proto, bytes);
1254 }
1255
1256 /*
1257  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1258  * the interface.
1259  * iface_entry->tag_stat_list_lock should be held.
1260  */
1261 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1262                                            tag_t tag)
1263 {
1264         struct tag_stat *new_tag_stat_entry = NULL;
1265         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1266                  " (uid=%u)\n", __func__,
1267                  iface_entry, tag, get_uid_from_tag(tag));
1268         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1269         if (!new_tag_stat_entry) {
1270                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1271                 goto done;
1272         }
1273         new_tag_stat_entry->tn.tag = tag;
1274         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1275 done:
1276         return new_tag_stat_entry;
1277 }
1278
1279 static void if_tag_stat_update(const char *ifname, uid_t uid,
1280                                const struct sock *sk, enum ifs_tx_rx direction,
1281                                int proto, int bytes)
1282 {
1283         struct tag_stat *tag_stat_entry;
1284         tag_t tag, acct_tag;
1285         tag_t uid_tag;
1286         struct data_counters *uid_tag_counters;
1287         struct sock_tag *sock_tag_entry;
1288         struct iface_stat *iface_entry;
1289         struct tag_stat *new_tag_stat = NULL;
1290         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1291                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1292                  ifname, uid, sk, direction, proto, bytes);
1293
1294         spin_lock_bh(&iface_stat_list_lock);
1295         iface_entry = get_iface_entry(ifname);
1296         if (!iface_entry) {
1297                 pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
1298                                    "%s not found\n", ifname);
1299                 spin_unlock_bh(&iface_stat_list_lock);
1300                 return;
1301         }
1302         /* It is ok to process data when an iface_entry is inactive */
1303
1304         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1305                  ifname, iface_entry);
1306
1307         /*
1308          * Look for a tagged sock.
1309          * It will have an acct_uid.
1310          */
1311         sock_tag_entry = get_sock_stat(sk);
1312         if (sock_tag_entry) {
1313                 tag = sock_tag_entry->tag;
1314                 acct_tag = get_atag_from_tag(tag);
1315                 uid_tag = get_utag_from_tag(tag);
1316         } else {
1317                 acct_tag = make_atag_from_value(0);
1318                 tag = combine_atag_with_uid(acct_tag, uid);
1319                 uid_tag = make_tag_from_uid(uid);
1320         }
1321         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1322                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1323                  tag, get_uid_from_tag(tag), iface_entry);
1324         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1325         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1326
1327         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1328                                               tag);
1329         if (tag_stat_entry) {
1330                 /*
1331                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1332                  * {0, uid_tag} will also get updated.
1333                  */
1334                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1335                 goto unlock;
1336         }
1337
1338         /* Loop over tag list under this interface for {0,uid_tag} */
1339         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1340                                               uid_tag);
1341         if (!tag_stat_entry) {
1342                 /* Here: the base uid_tag did not exist */
1343                 /*
1344                  * No parent counters. So
1345                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1346                  */
1347                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1348                 if (!new_tag_stat)
1349                         goto unlock;
1350                 uid_tag_counters = &new_tag_stat->counters;
1351         } else {
1352                 uid_tag_counters = &tag_stat_entry->counters;
1353         }
1354
1355         if (acct_tag) {
1356                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1357                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1358                 if (!new_tag_stat)
1359                         goto unlock;
1360                 new_tag_stat->parent_counters = uid_tag_counters;
1361         } else {
1362                 /*
1363                  * For new_tag_stat to be still NULL here would require:
1364                  *  {0, uid_tag} exists
1365                  *  and {acct_tag, uid_tag} doesn't exist
1366                  *  AND acct_tag == 0.
1367                  * Impossible. This reassures us that new_tag_stat
1368                  * below will always be assigned.
1369                  */
1370                 BUG_ON(!new_tag_stat);
1371         }
1372         tag_stat_update(new_tag_stat, direction, proto, bytes);
1373 unlock:
1374         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1375         spin_unlock_bh(&iface_stat_list_lock);
1376 }
1377
1378 static int iface_netdev_event_handler(struct notifier_block *nb,
1379                                       unsigned long event, void *ptr) {
1380         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1381
1382         if (unlikely(module_passive))
1383                 return NOTIFY_DONE;
1384
1385         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1386                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1387                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1388
1389         switch (event) {
1390         case NETDEV_UP:
1391                 iface_stat_create(dev, NULL);
1392                 atomic64_inc(&qtu_events.iface_events);
1393                 break;
1394         case NETDEV_DOWN:
1395         case NETDEV_UNREGISTER:
1396                 iface_stat_update(dev, event == NETDEV_DOWN);
1397                 atomic64_inc(&qtu_events.iface_events);
1398                 break;
1399         }
1400         return NOTIFY_DONE;
1401 }
1402
1403 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1404                                          unsigned long event, void *ptr)
1405 {
1406         struct inet6_ifaddr *ifa = ptr;
1407         struct net_device *dev;
1408
1409         if (unlikely(module_passive))
1410                 return NOTIFY_DONE;
1411
1412         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1413                  "ev=0x%lx/%s ifa=%p\n",
1414                  event, netdev_evt_str(event), ifa);
1415
1416         switch (event) {
1417         case NETDEV_UP:
1418                 BUG_ON(!ifa || !ifa->idev);
1419                 dev = (struct net_device *)ifa->idev->dev;
1420                 iface_stat_create_ipv6(dev, ifa);
1421                 atomic64_inc(&qtu_events.iface_events);
1422                 break;
1423         case NETDEV_DOWN:
1424         case NETDEV_UNREGISTER:
1425                 BUG_ON(!ifa || !ifa->idev);
1426                 dev = (struct net_device *)ifa->idev->dev;
1427                 iface_stat_update(dev, event == NETDEV_DOWN);
1428                 atomic64_inc(&qtu_events.iface_events);
1429                 break;
1430         }
1431         return NOTIFY_DONE;
1432 }
1433
1434 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1435                                         unsigned long event, void *ptr)
1436 {
1437         struct in_ifaddr *ifa = ptr;
1438         struct net_device *dev;
1439
1440         if (unlikely(module_passive))
1441                 return NOTIFY_DONE;
1442
1443         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1444                  "ev=0x%lx/%s ifa=%p\n",
1445                  event, netdev_evt_str(event), ifa);
1446
1447         switch (event) {
1448         case NETDEV_UP:
1449                 BUG_ON(!ifa || !ifa->ifa_dev);
1450                 dev = ifa->ifa_dev->dev;
1451                 iface_stat_create(dev, ifa);
1452                 atomic64_inc(&qtu_events.iface_events);
1453                 break;
1454         case NETDEV_DOWN:
1455         case NETDEV_UNREGISTER:
1456                 BUG_ON(!ifa || !ifa->ifa_dev);
1457                 dev = ifa->ifa_dev->dev;
1458                 iface_stat_update(dev, event == NETDEV_DOWN);
1459                 atomic64_inc(&qtu_events.iface_events);
1460                 break;
1461         }
1462         return NOTIFY_DONE;
1463 }
1464
1465 static struct notifier_block iface_netdev_notifier_blk = {
1466         .notifier_call = iface_netdev_event_handler,
1467 };
1468
1469 static struct notifier_block iface_inetaddr_notifier_blk = {
1470         .notifier_call = iface_inetaddr_event_handler,
1471 };
1472
1473 static struct notifier_block iface_inet6addr_notifier_blk = {
1474         .notifier_call = iface_inet6addr_event_handler,
1475 };
1476
1477 static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
1478         .start  = iface_stat_fmt_proc_start,
1479         .next   = iface_stat_fmt_proc_next,
1480         .stop   = iface_stat_fmt_proc_stop,
1481         .show   = iface_stat_fmt_proc_show,
1482 };
1483
1484 static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
1485 {
1486         struct proc_iface_stat_fmt_info *s;
1487
1488         s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
1489                         sizeof(struct proc_iface_stat_fmt_info));
1490         if (!s)
1491                 return -ENOMEM;
1492
1493         s->fmt = (uintptr_t)PDE_DATA(inode);
1494         return 0;
1495 }
1496
1497 static const struct file_operations proc_iface_stat_fmt_fops = {
1498         .open           = proc_iface_stat_fmt_open,
1499         .read           = seq_read,
1500         .llseek         = seq_lseek,
1501         .release        = seq_release_private,
1502 };
1503
1504 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1505 {
1506         int err;
1507
1508         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1509         if (!iface_stat_procdir) {
1510                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1511                 err = -1;
1512                 goto err;
1513         }
1514
1515         iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
1516                                                    proc_iface_perms,
1517                                                    parent_procdir,
1518                                                    &proc_iface_stat_fmt_fops,
1519                                                    (void *)1 /* fmt1 */);
1520         if (!iface_stat_all_procfile) {
1521                 pr_err("qtaguid: iface_stat: init "
1522                        " failed to create stat_old proc entry\n");
1523                 err = -1;
1524                 goto err_zap_entry;
1525         }
1526
1527         iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
1528                                                    proc_iface_perms,
1529                                                    parent_procdir,
1530                                                    &proc_iface_stat_fmt_fops,
1531                                                    (void *)2 /* fmt2 */);
1532         if (!iface_stat_fmt_procfile) {
1533                 pr_err("qtaguid: iface_stat: init "
1534                        " failed to create stat_all proc entry\n");
1535                 err = -1;
1536                 goto err_zap_all_stats_entry;
1537         }
1538
1539
1540         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1541         if (err) {
1542                 pr_err("qtaguid: iface_stat: init "
1543                        "failed to register dev event handler\n");
1544                 goto err_zap_all_stats_entries;
1545         }
1546         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1547         if (err) {
1548                 pr_err("qtaguid: iface_stat: init "
1549                        "failed to register ipv4 dev event handler\n");
1550                 goto err_unreg_nd;
1551         }
1552
1553         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1554         if (err) {
1555                 pr_err("qtaguid: iface_stat: init "
1556                        "failed to register ipv6 dev event handler\n");
1557                 goto err_unreg_ip4_addr;
1558         }
1559         return 0;
1560
1561 err_unreg_ip4_addr:
1562         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1563 err_unreg_nd:
1564         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1565 err_zap_all_stats_entries:
1566         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1567 err_zap_all_stats_entry:
1568         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1569 err_zap_entry:
1570         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1571 err:
1572         return err;
1573 }
1574
1575 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1576                                     struct xt_action_param *par)
1577 {
1578         struct sock *sk;
1579         unsigned int hook_mask = (1 << par->hooknum);
1580
1581         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1582                  par->hooknum, par->family);
1583
1584         /*
1585          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1586          * return garbage SKs.
1587          */
1588         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1589                 return NULL;
1590
1591         switch (par->family) {
1592         case NFPROTO_IPV6:
1593                 sk = xt_socket_lookup_slow_v6(dev_net(skb->dev), skb, par->in);
1594                 break;
1595         case NFPROTO_IPV4:
1596                 sk = xt_socket_lookup_slow_v4(dev_net(skb->dev), skb, par->in);
1597                 break;
1598         default:
1599                 return NULL;
1600         }
1601
1602         if (sk) {
1603                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1604                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1605                 /*
1606                  * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1607                  * "struct inet_timewait_sock" which is missing fields.
1608                  */
1609                 if (!sk_fullsock(sk) || sk->sk_state  == TCP_TIME_WAIT) {
1610                         sock_gen_put(sk);
1611                         sk = NULL;
1612                 }
1613         }
1614         return sk;
1615 }
1616
1617 static void account_for_uid(const struct sk_buff *skb,
1618                             const struct sock *alternate_sk, uid_t uid,
1619                             struct xt_action_param *par)
1620 {
1621         const struct net_device *el_dev;
1622
1623         if (!skb->dev) {
1624                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1625                 el_dev = par->in ? : par->out;
1626         } else {
1627                 const struct net_device *other_dev;
1628                 el_dev = skb->dev;
1629                 other_dev = par->in ? : par->out;
1630                 if (el_dev != other_dev) {
1631                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1632                                 "par->(in/out)=%p %s\n",
1633                                 par->hooknum, el_dev, el_dev->name, other_dev,
1634                                 other_dev->name);
1635                 }
1636         }
1637
1638         if (unlikely(!el_dev)) {
1639                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1640         } else if (unlikely(!el_dev->name)) {
1641                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1642         } else {
1643                 int proto = ipx_proto(skb, par);
1644                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1645                          par->hooknum, el_dev->name, el_dev->type,
1646                          par->family, proto);
1647
1648                 if_tag_stat_update(el_dev->name, uid,
1649                                 skb->sk ? skb->sk : alternate_sk,
1650                                 par->in ? IFS_RX : IFS_TX,
1651                                 proto, skb->len);
1652         }
1653 }
1654
1655 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1656 {
1657         const struct xt_qtaguid_match_info *info = par->matchinfo;
1658         const struct file *filp;
1659         bool got_sock = false;
1660         struct sock *sk;
1661         kuid_t sock_uid;
1662         bool res;
1663         bool set_sk_callback_lock = false;
1664
1665         if (unlikely(module_passive))
1666                 return (info->match ^ info->invert) == 0;
1667
1668         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1669                  par->hooknum, skb, par->in, par->out, par->family);
1670
1671         atomic64_inc(&qtu_events.match_calls);
1672         if (skb == NULL) {
1673                 res = (info->match ^ info->invert) == 0;
1674                 goto ret_res;
1675         }
1676
1677         switch (par->hooknum) {
1678         case NF_INET_PRE_ROUTING:
1679         case NF_INET_POST_ROUTING:
1680                 atomic64_inc(&qtu_events.match_calls_prepost);
1681                 iface_stat_update_from_skb(skb, par);
1682                 /*
1683                  * We are done in pre/post. The skb will get processed
1684                  * further alter.
1685                  */
1686                 res = (info->match ^ info->invert);
1687                 goto ret_res;
1688                 break;
1689         /* default: Fall through and do UID releated work */
1690         }
1691
1692         sk = skb_to_full_sk(skb);
1693         /*
1694          * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1695          * "struct inet_timewait_sock" which is missing fields.
1696          * So we ignore it.
1697          */
1698         if (sk && sk->sk_state == TCP_TIME_WAIT)
1699                 sk = NULL;
1700         if (sk == NULL) {
1701                 /*
1702                  * A missing sk->sk_socket happens when packets are in-flight
1703                  * and the matching socket is already closed and gone.
1704                  */
1705                 sk = qtaguid_find_sk(skb, par);
1706                 /*
1707                  * If we got the socket from the find_sk(), we will need to put
1708                  * it back, as nf_tproxy_get_sock_v4() got it.
1709                  */
1710                 got_sock = sk;
1711                 if (sk)
1712                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1713                 else
1714                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1715         } else {
1716                 atomic64_inc(&qtu_events.match_found_sk);
1717         }
1718         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1719                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1720         if (sk != NULL) {
1721                 set_sk_callback_lock = true;
1722                 read_lock_bh(&sk->sk_callback_lock);
1723                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1724                         par->hooknum, sk, sk->sk_socket,
1725                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1726                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1727                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1728                         par->hooknum, filp ? from_kuid(&init_user_ns, filp->f_cred->fsuid) : -1);
1729         }
1730
1731         if (sk == NULL || sk->sk_socket == NULL) {
1732                 /*
1733                  * Here, the qtaguid_find_sk() using connection tracking
1734                  * couldn't find the owner, so for now we just count them
1735                  * against the system.
1736                  */
1737                 /*
1738                  * TODO: unhack how to force just accounting.
1739                  * For now we only do iface stats when the uid-owner is not
1740                  * requested.
1741                  */
1742                 if (!(info->match & XT_QTAGUID_UID))
1743                         account_for_uid(skb, sk, 0, par);
1744                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1745                         par->hooknum,
1746                         sk ? sk->sk_socket : NULL);
1747                 res = (info->match ^ info->invert) == 0;
1748                 atomic64_inc(&qtu_events.match_no_sk);
1749                 goto put_sock_ret_res;
1750         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1751                 res = false;
1752                 goto put_sock_ret_res;
1753         }
1754         filp = sk->sk_socket->file;
1755         if (filp == NULL) {
1756                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1757                 account_for_uid(skb, sk, 0, par);
1758                 res = ((info->match ^ info->invert) &
1759                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1760                 atomic64_inc(&qtu_events.match_no_sk_file);
1761                 goto put_sock_ret_res;
1762         }
1763         sock_uid = filp->f_cred->fsuid;
1764         /*
1765          * TODO: unhack how to force just accounting.
1766          * For now we only do iface stats when the uid-owner is not requested
1767          */
1768         if (!(info->match & XT_QTAGUID_UID))
1769                 account_for_uid(skb, sk, from_kuid(&init_user_ns, sock_uid), par);
1770
1771         /*
1772          * The following two tests fail the match when:
1773          *    id not in range AND no inverted condition requested
1774          * or id     in range AND    inverted condition requested
1775          * Thus (!a && b) || (a && !b) == a ^ b
1776          */
1777         if (info->match & XT_QTAGUID_UID) {
1778                 kuid_t uid_min = make_kuid(&init_user_ns, info->uid_min);
1779                 kuid_t uid_max = make_kuid(&init_user_ns, info->uid_max);
1780
1781                 if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
1782                      uid_lte(filp->f_cred->fsuid, uid_max)) ^
1783                     !(info->invert & XT_QTAGUID_UID)) {
1784                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1785                                  par->hooknum);
1786                         res = false;
1787                         goto put_sock_ret_res;
1788                 }
1789         }
1790         if (info->match & XT_QTAGUID_GID) {
1791                 kgid_t gid_min = make_kgid(&init_user_ns, info->gid_min);
1792                 kgid_t gid_max = make_kgid(&init_user_ns, info->gid_max);
1793
1794                 if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
1795                                 gid_lte(filp->f_cred->fsgid, gid_max)) ^
1796                         !(info->invert & XT_QTAGUID_GID)) {
1797                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1798                                 par->hooknum);
1799                         res = false;
1800                         goto put_sock_ret_res;
1801                 }
1802         }
1803         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1804         res = true;
1805
1806 put_sock_ret_res:
1807         if (got_sock)
1808                 sock_gen_put(sk);
1809         if (set_sk_callback_lock)
1810                 read_unlock_bh(&sk->sk_callback_lock);
1811 ret_res:
1812         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1813         return res;
1814 }
1815
1816 #ifdef DDEBUG
1817 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1818 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1819 {
1820         va_list args;
1821         char *fmt_buff;
1822         char *buff;
1823
1824         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1825                 return;
1826
1827         fmt_buff = kasprintf(GFP_ATOMIC,
1828                              "qtaguid: %s(): %s {\n", __func__, fmt);
1829         BUG_ON(!fmt_buff);
1830         va_start(args, fmt);
1831         buff = kvasprintf(GFP_ATOMIC,
1832                           fmt_buff, args);
1833         BUG_ON(!buff);
1834         pr_debug("%s", buff);
1835         kfree(fmt_buff);
1836         kfree(buff);
1837         va_end(args);
1838
1839         spin_lock_bh(&sock_tag_list_lock);
1840         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1841         spin_unlock_bh(&sock_tag_list_lock);
1842
1843         spin_lock_bh(&sock_tag_list_lock);
1844         spin_lock_bh(&uid_tag_data_tree_lock);
1845         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1846         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1847         spin_unlock_bh(&uid_tag_data_tree_lock);
1848         spin_unlock_bh(&sock_tag_list_lock);
1849
1850         spin_lock_bh(&iface_stat_list_lock);
1851         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1852         spin_unlock_bh(&iface_stat_list_lock);
1853
1854         pr_debug("qtaguid: %s(): }\n", __func__);
1855 }
1856 #else
1857 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1858 #endif
1859
1860 struct proc_ctrl_print_info {
1861         struct sock *sk; /* socket found by reading to sk_pos */
1862         loff_t sk_pos;
1863 };
1864
1865 static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
1866 {
1867         struct proc_ctrl_print_info *pcpi = m->private;
1868         struct sock_tag *sock_tag_entry = v;
1869         struct rb_node *node;
1870
1871         (*pos)++;
1872
1873         if (!v || v  == SEQ_START_TOKEN)
1874                 return NULL;
1875
1876         node = rb_next(&sock_tag_entry->sock_node);
1877         if (!node) {
1878                 pcpi->sk = NULL;
1879                 sock_tag_entry = SEQ_START_TOKEN;
1880         } else {
1881                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1882                 pcpi->sk = sock_tag_entry->sk;
1883         }
1884         pcpi->sk_pos = *pos;
1885         return sock_tag_entry;
1886 }
1887
1888 static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
1889 {
1890         struct proc_ctrl_print_info *pcpi = m->private;
1891         struct sock_tag *sock_tag_entry;
1892         struct rb_node *node;
1893
1894         spin_lock_bh(&sock_tag_list_lock);
1895
1896         if (unlikely(module_passive))
1897                 return NULL;
1898
1899         if (*pos == 0) {
1900                 pcpi->sk_pos = 0;
1901                 node = rb_first(&sock_tag_tree);
1902                 if (!node) {
1903                         pcpi->sk = NULL;
1904                         return SEQ_START_TOKEN;
1905                 }
1906                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1907                 pcpi->sk = sock_tag_entry->sk;
1908         } else {
1909                 sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
1910                                                 NULL) ?: SEQ_START_TOKEN;
1911                 if (*pos != pcpi->sk_pos) {
1912                         /* seq_read skipped a next call */
1913                         *pos = pcpi->sk_pos;
1914                         return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
1915                 }
1916         }
1917         return sock_tag_entry;
1918 }
1919
1920 static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
1921 {
1922         spin_unlock_bh(&sock_tag_list_lock);
1923 }
1924
1925 /*
1926  * Procfs reader to get all active socket tags using style "1)" as described in
1927  * fs/proc/generic.c
1928  */
1929 static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
1930 {
1931         struct sock_tag *sock_tag_entry = v;
1932         uid_t uid;
1933         long f_count;
1934
1935         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1936                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
1937
1938         if (sock_tag_entry != SEQ_START_TOKEN) {
1939                 uid = get_uid_from_tag(sock_tag_entry->tag);
1940                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1941                          "pid=%u\n",
1942                          sock_tag_entry->sk,
1943                          sock_tag_entry->tag,
1944                          uid,
1945                          sock_tag_entry->pid
1946                         );
1947                 f_count = atomic_long_read(
1948                         &sock_tag_entry->socket->file->f_count);
1949                 seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
1950                            "f_count=%lu\n",
1951                            sock_tag_entry->sk,
1952                            sock_tag_entry->tag, uid,
1953                            sock_tag_entry->pid, f_count);
1954         } else {
1955                 seq_printf(m, "events: sockets_tagged=%llu "
1956                            "sockets_untagged=%llu "
1957                            "counter_set_changes=%llu "
1958                            "delete_cmds=%llu "
1959                            "iface_events=%llu "
1960                            "match_calls=%llu "
1961                            "match_calls_prepost=%llu "
1962                            "match_found_sk=%llu "
1963                            "match_found_sk_in_ct=%llu "
1964                            "match_found_no_sk_in_ct=%llu "
1965                            "match_no_sk=%llu "
1966                            "match_no_sk_file=%llu\n",
1967                            (u64)atomic64_read(&qtu_events.sockets_tagged),
1968                            (u64)atomic64_read(&qtu_events.sockets_untagged),
1969                            (u64)atomic64_read(&qtu_events.counter_set_changes),
1970                            (u64)atomic64_read(&qtu_events.delete_cmds),
1971                            (u64)atomic64_read(&qtu_events.iface_events),
1972                            (u64)atomic64_read(&qtu_events.match_calls),
1973                            (u64)atomic64_read(&qtu_events.match_calls_prepost),
1974                            (u64)atomic64_read(&qtu_events.match_found_sk),
1975                            (u64)atomic64_read(&qtu_events.match_found_sk_in_ct),
1976                            (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct),
1977                            (u64)atomic64_read(&qtu_events.match_no_sk),
1978                            (u64)atomic64_read(&qtu_events.match_no_sk_file));
1979
1980                 /* Count the following as part of the last item_index */
1981                 prdebug_full_state(0, "proc ctrl");
1982         }
1983
1984         return 0;
1985 }
1986
1987 /*
1988  * Delete socket tags, and stat tags associated with a given
1989  * accouting tag and uid.
1990  */
1991 static int ctrl_cmd_delete(const char *input)
1992 {
1993         char cmd;
1994         int uid_int;
1995         kuid_t uid;
1996         uid_t entry_uid;
1997         tag_t acct_tag;
1998         tag_t tag;
1999         int res, argc;
2000         struct iface_stat *iface_entry;
2001         struct rb_node *node;
2002         struct sock_tag *st_entry;
2003         struct rb_root st_to_free_tree = RB_ROOT;
2004         struct tag_stat *ts_entry;
2005         struct tag_counter_set *tcs_entry;
2006         struct tag_ref *tr_entry;
2007         struct uid_tag_data *utd_entry;
2008
2009         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid_int);
2010         uid = make_kuid(&init_user_ns, uid_int);
2011         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
2012                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2013                  acct_tag, uid_int);
2014         if (argc < 2) {
2015                 res = -EINVAL;
2016                 goto err;
2017         }
2018         if (!valid_atag(acct_tag)) {
2019                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2020                 res = -EINVAL;
2021                 goto err;
2022         }
2023         if (argc < 3) {
2024                 uid = current_fsuid();
2025                 uid_int = from_kuid(&init_user_ns, uid);
2026         } else if (!can_impersonate_uid(uid)) {
2027                 pr_info("qtaguid: ctrl_delete(%s): "
2028                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2029                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2030                 res = -EPERM;
2031                 goto err;
2032         }
2033
2034         tag = combine_atag_with_uid(acct_tag, uid_int);
2035         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2036                  "looking for tag=0x%llx (uid=%u)\n",
2037                  input, tag, uid_int);
2038
2039         /* Delete socket tags */
2040         spin_lock_bh(&sock_tag_list_lock);
2041         node = rb_first(&sock_tag_tree);
2042         while (node) {
2043                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2044                 entry_uid = get_uid_from_tag(st_entry->tag);
2045                 node = rb_next(node);
2046                 if (entry_uid != uid_int)
2047                         continue;
2048
2049                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2050                          input, st_entry->tag, entry_uid);
2051
2052                 if (!acct_tag || st_entry->tag == tag) {
2053                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2054                         /* Can't sockfd_put() within spinlock, do it later. */
2055                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2056                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2057                         BUG_ON(tr_entry->num_sock_tags <= 0);
2058                         tr_entry->num_sock_tags--;
2059                         /*
2060                          * TODO: remove if, and start failing.
2061                          * This is a hack to work around the fact that in some
2062                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2063                          * and are trying to work around apps
2064                          * that didn't open the /dev/xt_qtaguid.
2065                          */
2066                         if (st_entry->list.next && st_entry->list.prev)
2067                                 list_del(&st_entry->list);
2068                 }
2069         }
2070         spin_unlock_bh(&sock_tag_list_lock);
2071
2072         sock_tag_tree_erase(&st_to_free_tree);
2073
2074         /* Delete tag counter-sets */
2075         spin_lock_bh(&tag_counter_set_list_lock);
2076         /* Counter sets are only on the uid tag, not full tag */
2077         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2078         if (tcs_entry) {
2079                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2080                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2081                          input,
2082                          tcs_entry->tn.tag,
2083                          get_uid_from_tag(tcs_entry->tn.tag),
2084                          tcs_entry->active_set);
2085                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2086                 kfree(tcs_entry);
2087         }
2088         spin_unlock_bh(&tag_counter_set_list_lock);
2089
2090         /*
2091          * If acct_tag is 0, then all entries belonging to uid are
2092          * erased.
2093          */
2094         spin_lock_bh(&iface_stat_list_lock);
2095         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2096                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2097                 node = rb_first(&iface_entry->tag_stat_tree);
2098                 while (node) {
2099                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2100                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2101                         node = rb_next(node);
2102
2103                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2104                                  "ts tag=0x%llx (uid=%u)\n",
2105                                  input, ts_entry->tn.tag, entry_uid);
2106
2107                         if (entry_uid != uid_int)
2108                                 continue;
2109                         if (!acct_tag || ts_entry->tn.tag == tag) {
2110                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2111                                          "erase ts: %s 0x%llx %u\n",
2112                                          input, iface_entry->ifname,
2113                                          get_atag_from_tag(ts_entry->tn.tag),
2114                                          entry_uid);
2115                                 rb_erase(&ts_entry->tn.node,
2116                                          &iface_entry->tag_stat_tree);
2117                                 kfree(ts_entry);
2118                         }
2119                 }
2120                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2121         }
2122         spin_unlock_bh(&iface_stat_list_lock);
2123
2124         /* Cleanup the uid_tag_data */
2125         spin_lock_bh(&uid_tag_data_tree_lock);
2126         node = rb_first(&uid_tag_data_tree);
2127         while (node) {
2128                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2129                 entry_uid = utd_entry->uid;
2130                 node = rb_next(node);
2131
2132                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2133                          "utd uid=%u\n",
2134                          input, entry_uid);
2135
2136                 if (entry_uid != uid_int)
2137                         continue;
2138                 /*
2139                  * Go over the tag_refs, and those that don't have
2140                  * sock_tags using them are freed.
2141                  */
2142                 put_tag_ref_tree(tag, utd_entry);
2143                 put_utd_entry(utd_entry);
2144         }
2145         spin_unlock_bh(&uid_tag_data_tree_lock);
2146
2147         atomic64_inc(&qtu_events.delete_cmds);
2148         res = 0;
2149
2150 err:
2151         return res;
2152 }
2153
2154 static int ctrl_cmd_counter_set(const char *input)
2155 {
2156         char cmd;
2157         uid_t uid = 0;
2158         tag_t tag;
2159         int res, argc;
2160         struct tag_counter_set *tcs;
2161         int counter_set;
2162
2163         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2164         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2165                  "set=%d uid=%u\n", input, argc, cmd,
2166                  counter_set, uid);
2167         if (argc != 3) {
2168                 res = -EINVAL;
2169                 goto err;
2170         }
2171         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2172                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2173                         input);
2174                 res = -EINVAL;
2175                 goto err;
2176         }
2177         if (!can_manipulate_uids()) {
2178                 pr_info("qtaguid: ctrl_counterset(%s): "
2179                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2180                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2181                 res = -EPERM;
2182                 goto err;
2183         }
2184
2185         tag = make_tag_from_uid(uid);
2186         spin_lock_bh(&tag_counter_set_list_lock);
2187         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2188         if (!tcs) {
2189                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2190                 if (!tcs) {
2191                         spin_unlock_bh(&tag_counter_set_list_lock);
2192                         pr_err("qtaguid: ctrl_counterset(%s): "
2193                                "failed to alloc counter set\n",
2194                                input);
2195                         res = -ENOMEM;
2196                         goto err;
2197                 }
2198                 tcs->tn.tag = tag;
2199                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2200                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2201                          "(uid=%u) set=%d\n",
2202                          input, tag, get_uid_from_tag(tag), counter_set);
2203         }
2204         tcs->active_set = counter_set;
2205         spin_unlock_bh(&tag_counter_set_list_lock);
2206         atomic64_inc(&qtu_events.counter_set_changes);
2207         res = 0;
2208
2209 err:
2210         return res;
2211 }
2212
2213 static int ctrl_cmd_tag(const char *input)
2214 {
2215         char cmd;
2216         int sock_fd = 0;
2217         kuid_t uid;
2218         unsigned int uid_int = 0;
2219         tag_t acct_tag = make_atag_from_value(0);
2220         tag_t full_tag;
2221         struct socket *el_socket;
2222         int res, argc;
2223         struct sock_tag *sock_tag_entry;
2224         struct tag_ref *tag_ref_entry;
2225         struct uid_tag_data *uid_tag_data_entry;
2226         struct proc_qtu_data *pqd_entry;
2227
2228         /* Unassigned args will get defaulted later. */
2229         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid_int);
2230         uid = make_kuid(&init_user_ns, uid_int);
2231         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2232                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2233                  acct_tag, uid_int);
2234         if (argc < 2) {
2235                 res = -EINVAL;
2236                 goto err;
2237         }
2238         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2239         if (!el_socket) {
2240                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2241                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2242                         input, sock_fd, res, current->pid, current->tgid,
2243                         from_kuid(&init_user_ns, current_fsuid()));
2244                 goto err;
2245         }
2246         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2247                  input, atomic_long_read(&el_socket->file->f_count),
2248                  el_socket->sk);
2249         if (argc < 3) {
2250                 acct_tag = make_atag_from_value(0);
2251         } else if (!valid_atag(acct_tag)) {
2252                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2253                 res = -EINVAL;
2254                 goto err_put;
2255         }
2256         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2257                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2258                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2259                  input, current->pid, current->tgid,
2260                  from_kuid(&init_user_ns, current_uid()),
2261                  from_kuid(&init_user_ns, current_euid()),
2262                  from_kuid(&init_user_ns, current_fsuid()),
2263                  from_kgid(&init_user_ns, xt_qtaguid_ctrl_file->gid),
2264                  in_group_p(xt_qtaguid_ctrl_file->gid),
2265                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2266         if (argc < 4) {
2267                 uid = current_fsuid();
2268                 uid_int = from_kuid(&init_user_ns, uid);
2269         } else if (!can_impersonate_uid(uid)) {
2270                 pr_info("qtaguid: ctrl_tag(%s): "
2271                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2272                         input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2273                 res = -EPERM;
2274                 goto err_put;
2275         }
2276         full_tag = combine_atag_with_uid(acct_tag, uid_int);
2277
2278         spin_lock_bh(&sock_tag_list_lock);
2279         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2280         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2281         if (IS_ERR(tag_ref_entry)) {
2282                 res = PTR_ERR(tag_ref_entry);
2283                 spin_unlock_bh(&sock_tag_list_lock);
2284                 goto err_put;
2285         }
2286         tag_ref_entry->num_sock_tags++;
2287         if (sock_tag_entry) {
2288                 struct tag_ref *prev_tag_ref_entry;
2289
2290                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2291                          "st@%p ...->f_count=%ld\n",
2292                          input, el_socket->sk, sock_tag_entry,
2293                          atomic_long_read(&el_socket->file->f_count));
2294                 /*
2295                  * This is a re-tagging, so release the sock_fd that was
2296                  * locked at the time of the 1st tagging.
2297                  * There is still the ref from this call's sockfd_lookup() so
2298                  * it can be done within the spinlock.
2299                  */
2300                 sockfd_put(sock_tag_entry->socket);
2301                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2302                                                     &uid_tag_data_entry);
2303                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2304                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2305                 prev_tag_ref_entry->num_sock_tags--;
2306                 sock_tag_entry->tag = full_tag;
2307         } else {
2308                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2309                          input, el_socket->sk);
2310                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2311                                          GFP_ATOMIC);
2312                 if (!sock_tag_entry) {
2313                         pr_err("qtaguid: ctrl_tag(%s): "
2314                                "socket tag alloc failed\n",
2315                                input);
2316                         spin_unlock_bh(&sock_tag_list_lock);
2317                         res = -ENOMEM;
2318                         goto err_tag_unref_put;
2319                 }
2320                 sock_tag_entry->sk = el_socket->sk;
2321                 sock_tag_entry->socket = el_socket;
2322                 sock_tag_entry->pid = current->tgid;
2323                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag, uid_int);
2324                 spin_lock_bh(&uid_tag_data_tree_lock);
2325                 pqd_entry = proc_qtu_data_tree_search(
2326                         &proc_qtu_data_tree, current->tgid);
2327                 /*
2328                  * TODO: remove if, and start failing.
2329                  * At first, we want to catch user-space code that is not
2330                  * opening the /dev/xt_qtaguid.
2331                  */
2332                 if (IS_ERR_OR_NULL(pqd_entry))
2333                         pr_warn_once(
2334                                 "qtaguid: %s(): "
2335                                 "User space forgot to open /dev/xt_qtaguid? "
2336                                 "pid=%u tgid=%u uid=%u\n", __func__,
2337                                 current->pid, current->tgid,
2338                                 from_kuid(&init_user_ns, current_fsuid()));
2339                 else
2340                         list_add(&sock_tag_entry->list,
2341                                  &pqd_entry->sock_tag_list);
2342                 spin_unlock_bh(&uid_tag_data_tree_lock);
2343
2344                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2345                 atomic64_inc(&qtu_events.sockets_tagged);
2346         }
2347         spin_unlock_bh(&sock_tag_list_lock);
2348         /* We keep the ref to the socket (file) until it is untagged */
2349         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2350                  input, sock_tag_entry,
2351                  atomic_long_read(&el_socket->file->f_count));
2352         return 0;
2353
2354 err_tag_unref_put:
2355         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2356         tag_ref_entry->num_sock_tags--;
2357         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2358 err_put:
2359         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2360                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2361         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2362         sockfd_put(el_socket);
2363         return res;
2364
2365 err:
2366         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2367         return res;
2368 }
2369
2370 static int ctrl_cmd_untag(const char *input)
2371 {
2372         char cmd;
2373         int sock_fd = 0;
2374         struct socket *el_socket;
2375         int res, argc;
2376         struct sock_tag *sock_tag_entry;
2377         struct tag_ref *tag_ref_entry;
2378         struct uid_tag_data *utd_entry;
2379         struct proc_qtu_data *pqd_entry;
2380
2381         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2382         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2383                  input, argc, cmd, sock_fd);
2384         if (argc < 2) {
2385                 res = -EINVAL;
2386                 goto err;
2387         }
2388         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2389         if (!el_socket) {
2390                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2391                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2392                         input, sock_fd, res, current->pid, current->tgid,
2393                         from_kuid(&init_user_ns, current_fsuid()));
2394                 goto err;
2395         }
2396         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2397                  input, atomic_long_read(&el_socket->file->f_count),
2398                  el_socket->sk);
2399         spin_lock_bh(&sock_tag_list_lock);
2400         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2401         if (!sock_tag_entry) {
2402                 spin_unlock_bh(&sock_tag_list_lock);
2403                 res = -EINVAL;
2404                 goto err_put;
2405         }
2406         /*
2407          * The socket already belongs to the current process
2408          * so it can do whatever it wants to it.
2409          */
2410         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2411
2412         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2413         BUG_ON(!tag_ref_entry);
2414         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2415         spin_lock_bh(&uid_tag_data_tree_lock);
2416         pqd_entry = proc_qtu_data_tree_search(
2417                 &proc_qtu_data_tree, current->tgid);
2418         /*
2419          * TODO: remove if, and start failing.
2420          * At first, we want to catch user-space code that is not
2421          * opening the /dev/xt_qtaguid.
2422          */
2423         if (IS_ERR_OR_NULL(pqd_entry))
2424                 pr_warn_once("qtaguid: %s(): "
2425                              "User space forgot to open /dev/xt_qtaguid? "
2426                              "pid=%u tgid=%u uid=%u\n", __func__,
2427                              current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2428         else
2429                 list_del(&sock_tag_entry->list);
2430         spin_unlock_bh(&uid_tag_data_tree_lock);
2431         /*
2432          * We don't free tag_ref from the utd_entry here,
2433          * only during a cmd_delete().
2434          */
2435         tag_ref_entry->num_sock_tags--;
2436         spin_unlock_bh(&sock_tag_list_lock);
2437         /*
2438          * Release the sock_fd that was grabbed at tag time,
2439          * and once more for the sockfd_lookup() here.
2440          */
2441         sockfd_put(sock_tag_entry->socket);
2442         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2443                  input, sock_tag_entry,
2444                  atomic_long_read(&el_socket->file->f_count) - 1);
2445         sockfd_put(el_socket);
2446
2447         kfree(sock_tag_entry);
2448         atomic64_inc(&qtu_events.sockets_untagged);
2449
2450         return 0;
2451
2452 err_put:
2453         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2454                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2455         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2456         sockfd_put(el_socket);
2457         return res;
2458
2459 err:
2460         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2461         return res;
2462 }
2463
2464 static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
2465 {
2466         char cmd;
2467         ssize_t res;
2468
2469         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2470                  input, current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2471
2472         cmd = input[0];
2473         /* Collect params for commands */
2474         switch (cmd) {
2475         case 'd':
2476                 res = ctrl_cmd_delete(input);
2477                 break;
2478
2479         case 's':
2480                 res = ctrl_cmd_counter_set(input);
2481                 break;
2482
2483         case 't':
2484                 res = ctrl_cmd_tag(input);
2485                 break;
2486
2487         case 'u':
2488                 res = ctrl_cmd_untag(input);
2489                 break;
2490
2491         default:
2492                 res = -EINVAL;
2493                 goto err;
2494         }
2495         if (!res)
2496                 res = count;
2497 err:
2498         CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
2499         return res;
2500 }
2501
2502 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2503 static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2504                                    size_t count, loff_t *offp)
2505 {
2506         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2507
2508         if (unlikely(module_passive))
2509                 return count;
2510
2511         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2512                 return -EINVAL;
2513
2514         if (copy_from_user(input_buf, buffer, count))
2515                 return -EFAULT;
2516
2517         input_buf[count] = '\0';
2518         return qtaguid_ctrl_parse(input_buf, count);
2519 }
2520
2521 struct proc_print_info {
2522         struct iface_stat *iface_entry;
2523         int item_index;
2524         tag_t tag; /* tag found by reading to tag_pos */
2525         off_t tag_pos;
2526         int tag_item_index;
2527 };
2528
2529 static void pp_stats_header(struct seq_file *m)
2530 {
2531         seq_puts(m,
2532                  "idx iface acct_tag_hex uid_tag_int cnt_set "
2533                  "rx_bytes rx_packets "
2534                  "tx_bytes tx_packets "
2535                  "rx_tcp_bytes rx_tcp_packets "
2536                  "rx_udp_bytes rx_udp_packets "
2537                  "rx_other_bytes rx_other_packets "
2538                  "tx_tcp_bytes tx_tcp_packets "
2539                  "tx_udp_bytes tx_udp_packets "
2540                  "tx_other_bytes tx_other_packets\n");
2541 }
2542
2543 static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
2544                          int cnt_set)
2545 {
2546         struct data_counters *cnts;
2547         tag_t tag = ts_entry->tn.tag;
2548         uid_t stat_uid = get_uid_from_tag(tag);
2549         struct proc_print_info *ppi = m->private;
2550         /* Detailed tags are not available to everybody */
2551         if (get_atag_from_tag(tag) && !can_read_other_uid_stats(
2552                                                 make_kuid(&init_user_ns,stat_uid))) {
2553                 CT_DEBUG("qtaguid: stats line: "
2554                          "%s 0x%llx %u: insufficient priv "
2555                          "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2556                          ppi->iface_entry->ifname,
2557                          get_atag_from_tag(tag), stat_uid,
2558                          current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2559                          from_kgid(&init_user_ns,xt_qtaguid_stats_file->gid));
2560                 return 0;
2561         }
2562         ppi->item_index++;
2563         cnts = &ts_entry->counters;
2564         seq_printf(m, "%d %s 0x%llx %u %u "
2565                 "%llu %llu "
2566                 "%llu %llu "
2567                 "%llu %llu "
2568                 "%llu %llu "
2569                 "%llu %llu "
2570                 "%llu %llu "
2571                 "%llu %llu "
2572                 "%llu %llu\n",
2573                 ppi->item_index,
2574                 ppi->iface_entry->ifname,
2575                 get_atag_from_tag(tag),
2576                 stat_uid,
2577                 cnt_set,
2578                 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2579                 dc_sum_packets(cnts, cnt_set, IFS_RX),
2580                 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2581                 dc_sum_packets(cnts, cnt_set, IFS_TX),
2582                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2583                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2584                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2585                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2586                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2587                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2588                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2589                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2590                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2591                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2592                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2593                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2594         return seq_has_overflowed(m) ? -ENOSPC : 1;
2595 }
2596
2597 static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
2598 {
2599         int ret;
2600         int counter_set;
2601         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2602              counter_set++) {
2603                 ret = pp_stats_line(m, ts_entry, counter_set);
2604                 if (ret < 0)
2605                         return false;
2606         }
2607         return true;
2608 }
2609
2610 static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
2611 {
2612         struct iface_stat *iface_entry;
2613
2614         if (!ptr)
2615                 return false;
2616
2617         list_for_each_entry(iface_entry, &iface_stat_list, list)
2618                 if (iface_entry == ptr)
2619                         return true;
2620         return false;
2621 }
2622
2623 static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
2624 {
2625         spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2626         list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
2627                 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2628                 return;
2629         }
2630         ppi->iface_entry = NULL;
2631 }
2632
2633 static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
2634 {
2635         struct proc_print_info *ppi = m->private;
2636         struct tag_stat *ts_entry;
2637         struct rb_node *node;
2638
2639         if (!v) {
2640                 pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
2641                 return NULL;
2642         }
2643
2644         (*pos)++;
2645
2646         if (!ppi->iface_entry || unlikely(module_passive))
2647                 return NULL;
2648
2649         if (v == SEQ_START_TOKEN)
2650                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2651         else
2652                 node = rb_next(&((struct tag_stat *)v)->tn.node);
2653
2654         while (!node) {
2655                 qtaguid_stats_proc_next_iface_entry(ppi);
2656                 if (!ppi->iface_entry)
2657                         return NULL;
2658                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2659         }
2660
2661         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2662         ppi->tag = ts_entry->tn.tag;
2663         ppi->tag_pos = *pos;
2664         ppi->tag_item_index = ppi->item_index;
2665         return ts_entry;
2666 }
2667
2668 static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
2669 {
2670         struct proc_print_info *ppi = m->private;
2671         struct tag_stat *ts_entry = NULL;
2672
2673         spin_lock_bh(&iface_stat_list_lock);
2674
2675         if (*pos == 0) {
2676                 ppi->item_index = 1;
2677                 ppi->tag_pos = 0;
2678                 if (list_empty(&iface_stat_list)) {
2679                         ppi->iface_entry = NULL;
2680                 } else {
2681                         ppi->iface_entry = list_first_entry(&iface_stat_list,
2682                                                             struct iface_stat,
2683                                                             list);
2684                         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2685                 }
2686                 return SEQ_START_TOKEN;
2687         }
2688         if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
2689                 if (ppi->iface_entry) {
2690                         pr_err("qtaguid: %s(): iface_entry %p not found\n",
2691                                __func__, ppi->iface_entry);
2692                         ppi->iface_entry = NULL;
2693                 }
2694                 return NULL;
2695         }
2696
2697         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2698
2699         if (!ppi->tag_pos) {
2700                 /* seq_read skipped first next call */
2701                 ts_entry = SEQ_START_TOKEN;
2702         } else {
2703                 ts_entry = tag_stat_tree_search(
2704                                 &ppi->iface_entry->tag_stat_tree, ppi->tag);
2705                 if (!ts_entry) {
2706                         pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2707                                 __func__, ppi->tag);
2708                         return NULL;
2709                 }
2710         }
2711
2712         if (*pos == ppi->tag_pos) { /* normal resume */
2713                 ppi->item_index = ppi->tag_item_index;
2714         } else {
2715                 /* seq_read skipped a next call */
2716                 *pos = ppi->tag_pos;
2717                 ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
2718         }
2719
2720         return ts_entry;
2721 }
2722
2723 static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
2724 {
2725         struct proc_print_info *ppi = m->private;
2726         if (ppi->iface_entry)
2727                 spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2728         spin_unlock_bh(&iface_stat_list_lock);
2729 }
2730
2731 /*
2732  * Procfs reader to get all tag stats using style "1)" as described in
2733  * fs/proc/generic.c
2734  * Groups all protocols tx/rx bytes.
2735  */
2736 static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
2737 {
2738         struct tag_stat *ts_entry = v;
2739
2740         if (v == SEQ_START_TOKEN)
2741                 pp_stats_header(m);
2742         else
2743                 pp_sets(m, ts_entry);
2744
2745         return 0;
2746 }
2747
2748 /*------------------------------------------*/
2749 static int qtudev_open(struct inode *inode, struct file *file)
2750 {
2751         struct uid_tag_data *utd_entry;
2752         struct proc_qtu_data  *pqd_entry;
2753         struct proc_qtu_data  *new_pqd_entry;
2754         int res;
2755         bool utd_entry_found;
2756
2757         if (unlikely(qtu_proc_handling_passive))
2758                 return 0;
2759
2760         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2761                  current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2762
2763         spin_lock_bh(&uid_tag_data_tree_lock);
2764
2765         /* Look for existing uid data, or alloc one. */
2766         utd_entry = get_uid_data(from_kuid(&init_user_ns, current_fsuid()), &utd_entry_found);
2767         if (IS_ERR_OR_NULL(utd_entry)) {
2768                 res = PTR_ERR(utd_entry);
2769                 goto err_unlock;
2770         }
2771
2772         /* Look for existing PID based proc_data */
2773         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2774                                               current->tgid);
2775         if (pqd_entry) {
2776                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2777                        "%s already opened\n",
2778                        current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()),
2779                        QTU_DEV_NAME);
2780                 res = -EBUSY;
2781                 goto err_unlock_free_utd;
2782         }
2783
2784         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2785         if (!new_pqd_entry) {
2786                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2787                        "proc data alloc failed\n",
2788                        current->pid, current->tgid, from_kuid(&init_user_ns, current_fsuid()));
2789                 res = -ENOMEM;
2790                 goto err_unlock_free_utd;
2791         }
2792         new_pqd_entry->pid = current->tgid;
2793         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2794         new_pqd_entry->parent_tag_data = utd_entry;
2795         utd_entry->num_pqd++;
2796
2797         proc_qtu_data_tree_insert(new_pqd_entry,
2798                                   &proc_qtu_data_tree);
2799
2800         spin_unlock_bh(&uid_tag_data_tree_lock);
2801         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2802                  from_kuid(&init_user_ns, current_fsuid()), new_pqd_entry);
2803         file->private_data = new_pqd_entry;
2804         return 0;
2805
2806 err_unlock_free_utd:
2807         if (!utd_entry_found) {
2808                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2809                 kfree(utd_entry);
2810         }
2811 err_unlock:
2812         spin_unlock_bh(&uid_tag_data_tree_lock);
2813         return res;
2814 }
2815
2816 static int qtudev_release(struct inode *inode, struct file *file)
2817 {
2818         struct proc_qtu_data  *pqd_entry = file->private_data;
2819         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2820         struct sock_tag *st_entry;
2821         struct rb_root st_to_free_tree = RB_ROOT;
2822         struct list_head *entry, *next;
2823         struct tag_ref *tr;
2824
2825         if (unlikely(qtu_proc_handling_passive))
2826                 return 0;
2827
2828         /*
2829          * Do not trust the current->pid, it might just be a kworker cleaning
2830          * up after a dead proc.
2831          */
2832         DR_DEBUG("qtaguid: qtudev_release(): "
2833                  "pid=%u tgid=%u uid=%u "
2834                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2835                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2836                  pqd_entry, pqd_entry->pid, utd_entry,
2837                  utd_entry->num_active_tags);
2838
2839         spin_lock_bh(&sock_tag_list_lock);
2840         spin_lock_bh(&uid_tag_data_tree_lock);
2841
2842         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2843                 st_entry = list_entry(entry, struct sock_tag, list);
2844                 DR_DEBUG("qtaguid: %s(): "
2845                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2846                          __func__,
2847                          st_entry, st_entry->sk,
2848                          current->pid, current->tgid,
2849                          pqd_entry->parent_tag_data->uid);
2850
2851                 utd_entry = uid_tag_data_tree_search(
2852                         &uid_tag_data_tree,
2853                         get_uid_from_tag(st_entry->tag));
2854                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2855                 DR_DEBUG("qtaguid: %s(): "
2856                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2857                          st_entry->tag, utd_entry);
2858                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2859                                          st_entry->tag);
2860                 BUG_ON(!tr);
2861                 BUG_ON(tr->num_sock_tags <= 0);
2862                 tr->num_sock_tags--;
2863                 free_tag_ref_from_utd_entry(tr, utd_entry);
2864
2865                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2866                 list_del(&st_entry->list);
2867                 /* Can't sockfd_put() within spinlock, do it later. */
2868                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2869
2870                 /*
2871                  * Try to free the utd_entry if no other proc_qtu_data is
2872                  * using it (num_pqd is 0) and it doesn't have active tags
2873                  * (num_active_tags is 0).
2874                  */
2875                 put_utd_entry(utd_entry);
2876         }
2877
2878         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2879         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2880         pqd_entry->parent_tag_data->num_pqd--;
2881         put_utd_entry(pqd_entry->parent_tag_data);
2882         kfree(pqd_entry);
2883         file->private_data = NULL;
2884
2885         spin_unlock_bh(&uid_tag_data_tree_lock);
2886         spin_unlock_bh(&sock_tag_list_lock);
2887
2888
2889         sock_tag_tree_erase(&st_to_free_tree);
2890
2891         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2892                            current->pid, current->tgid);
2893         return 0;
2894 }
2895
2896 /*------------------------------------------*/
2897 static const struct file_operations qtudev_fops = {
2898         .owner = THIS_MODULE,
2899         .open = qtudev_open,
2900         .release = qtudev_release,
2901 };
2902
2903 static struct miscdevice qtu_device = {
2904         .minor = MISC_DYNAMIC_MINOR,
2905         .name = QTU_DEV_NAME,
2906         .fops = &qtudev_fops,
2907         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2908 };
2909
2910 static const struct seq_operations proc_qtaguid_ctrl_seqops = {
2911         .start = qtaguid_ctrl_proc_start,
2912         .next = qtaguid_ctrl_proc_next,
2913         .stop = qtaguid_ctrl_proc_stop,
2914         .show = qtaguid_ctrl_proc_show,
2915 };
2916
2917 static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
2918 {
2919         return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
2920                                 sizeof(struct proc_ctrl_print_info));
2921 }
2922
2923 static const struct file_operations proc_qtaguid_ctrl_fops = {
2924         .open           = proc_qtaguid_ctrl_open,
2925         .read           = seq_read,
2926         .write          = qtaguid_ctrl_proc_write,
2927         .llseek         = seq_lseek,
2928         .release        = seq_release_private,
2929 };
2930
2931 static const struct seq_operations proc_qtaguid_stats_seqops = {
2932         .start = qtaguid_stats_proc_start,
2933         .next = qtaguid_stats_proc_next,
2934         .stop = qtaguid_stats_proc_stop,
2935         .show = qtaguid_stats_proc_show,
2936 };
2937
2938 static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
2939 {
2940         return seq_open_private(file, &proc_qtaguid_stats_seqops,
2941                                 sizeof(struct proc_print_info));
2942 }
2943
2944 static const struct file_operations proc_qtaguid_stats_fops = {
2945         .open           = proc_qtaguid_stats_open,
2946         .read           = seq_read,
2947         .llseek         = seq_lseek,
2948         .release        = seq_release_private,
2949 };
2950
2951 /*------------------------------------------*/
2952 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2953 {
2954         int ret;
2955         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2956         if (!*res_procdir) {
2957                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2958                 ret = -ENOMEM;
2959                 goto no_dir;
2960         }
2961
2962         xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
2963                                                 *res_procdir,
2964                                                 &proc_qtaguid_ctrl_fops,
2965                                                 NULL);
2966         if (!xt_qtaguid_ctrl_file) {
2967                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2968                         " file\n");
2969                 ret = -ENOMEM;
2970                 goto no_ctrl_entry;
2971         }
2972
2973         xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
2974                                                  *res_procdir,
2975                                                  &proc_qtaguid_stats_fops,
2976                                                  NULL);
2977         if (!xt_qtaguid_stats_file) {
2978                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2979                         "file\n");
2980                 ret = -ENOMEM;
2981                 goto no_stats_entry;
2982         }
2983         /*
2984          * TODO: add support counter hacking
2985          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2986          */
2987         return 0;
2988
2989 no_stats_entry:
2990         remove_proc_entry("ctrl", *res_procdir);
2991 no_ctrl_entry:
2992         remove_proc_entry("xt_qtaguid", NULL);
2993 no_dir:
2994         return ret;
2995 }
2996
2997 static struct xt_match qtaguid_mt_reg __read_mostly = {
2998         /*
2999          * This module masquerades as the "owner" module so that iptables
3000          * tools can deal with it.
3001          */
3002         .name       = "owner",
3003         .revision   = 1,
3004         .family     = NFPROTO_UNSPEC,
3005         .match      = qtaguid_mt,
3006         .matchsize  = sizeof(struct xt_qtaguid_match_info),
3007         .me         = THIS_MODULE,
3008 };
3009
3010 static int __init qtaguid_mt_init(void)
3011 {
3012         if (qtaguid_proc_register(&xt_qtaguid_procdir)
3013             || iface_stat_init(xt_qtaguid_procdir)
3014             || xt_register_match(&qtaguid_mt_reg)
3015             || misc_register(&qtu_device))
3016                 return -1;
3017         return 0;
3018 }
3019
3020 /*
3021  * TODO: allow unloading of the module.
3022  * For now stats are permanent.
3023  * Kconfig forces'y/n' and never an 'm'.
3024  */
3025
3026 module_init(qtaguid_mt_init);
3027 MODULE_AUTHOR("jpa <jpa@google.com>");
3028 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3029 MODULE_LICENSE("GPL");
3030 MODULE_ALIAS("ipt_owner");
3031 MODULE_ALIAS("ip6t_owner");
3032 MODULE_ALIAS("ipt_qtaguid");
3033 MODULE_ALIAS("ip6t_qtaguid");