netfilter: xt_qtaguid: 3.10 fixes
[firefly-linux-kernel-4.4.55.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/ratelimit.h>
23 #include <linux/seq_file.h>
24 #include <linux/skbuff.h>
25 #include <linux/workqueue.h>
26 #include <net/addrconf.h>
27 #include <net/sock.h>
28 #include <net/tcp.h>
29 #include <net/udp.h>
30
31 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
32 #include <linux/netfilter_ipv6/ip6_tables.h>
33 #endif
34
35 #include <linux/netfilter/xt_socket.h>
36 #include "xt_qtaguid_internal.h"
37 #include "xt_qtaguid_print.h"
38 #include "../../fs/proc/internal.h"
39
40 /*
41  * We only use the xt_socket funcs within a similar context to avoid unexpected
42  * return values.
43  */
44 #define XT_SOCKET_SUPPORTED_HOOKS \
45         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
46
47
48 static const char *module_procdirname = "xt_qtaguid";
49 static struct proc_dir_entry *xt_qtaguid_procdir;
50
51 static unsigned int proc_iface_perms = S_IRUGO;
52 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
53
54 static struct proc_dir_entry *xt_qtaguid_stats_file;
55 static unsigned int proc_stats_perms = S_IRUGO;
56 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
57
58 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
59
60 /* Everybody can write. But proc_ctrl_write_limited is true by default which
61  * limits what can be controlled. See the can_*() functions.
62  */
63 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
64 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
65
66 /* Limited by default, so the gid of the ctrl and stats proc entries
67  * will limit what can be done. See the can_*() functions.
68  */
69 static bool proc_stats_readall_limited = true;
70 static bool proc_ctrl_write_limited = true;
71
72 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
73                    S_IRUGO | S_IWUSR);
74 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
75                    S_IRUGO | S_IWUSR);
76
77 /*
78  * Limit the number of active tags (via socket tags) for a given UID.
79  * Multiple processes could share the UID.
80  */
81 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
82 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
83
84 /*
85  * After the kernel has initiallized this module, it is still possible
86  * to make it passive.
87  * Setting passive to Y:
88  *  - the iface stats handling will not act on notifications.
89  *  - iptables matches will never match.
90  *  - ctrl commands silently succeed.
91  *  - stats are always empty.
92  * This is mostly usefull when a bug is suspected.
93  */
94 static bool module_passive;
95 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
96
97 /*
98  * Control how qtaguid data is tracked per proc/uid.
99  * Setting tag_tracking_passive to Y:
100  *  - don't create proc specific structs to track tags
101  *  - don't check that active tag stats exceed some limits.
102  *  - don't clean up socket tags on process exits.
103  * This is mostly usefull when a bug is suspected.
104  */
105 static bool qtu_proc_handling_passive;
106 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
107                    S_IRUGO | S_IWUSR);
108
109 #define QTU_DEV_NAME "xt_qtaguid"
110
111 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
112 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
113
114 /*---------------------------------------------------------------------------*/
115 static const char *iface_stat_procdirname = "iface_stat";
116 static struct proc_dir_entry *iface_stat_procdir;
117 /*
118  * The iface_stat_all* will go away once userspace gets use to the new fields
119  * that have a format line.
120  */
121 static const char *iface_stat_all_procfilename = "iface_stat_all";
122 static struct proc_dir_entry *iface_stat_all_procfile;
123 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
124 static struct proc_dir_entry *iface_stat_fmt_procfile;
125
126
127 static LIST_HEAD(iface_stat_list);
128 static DEFINE_SPINLOCK(iface_stat_list_lock);
129
130 static struct rb_root sock_tag_tree = RB_ROOT;
131 static DEFINE_SPINLOCK(sock_tag_list_lock);
132
133 static struct rb_root tag_counter_set_tree = RB_ROOT;
134 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
135
136 static struct rb_root uid_tag_data_tree = RB_ROOT;
137 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
138
139 static struct rb_root proc_qtu_data_tree = RB_ROOT;
140 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
141
142 static struct qtaguid_event_counts qtu_events;
143 /*----------------------------------------------*/
144 static bool can_manipulate_uids(void)
145 {
146         /* root pwnd */
147         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
148                 || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited)
149                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
150 }
151
152 static bool can_impersonate_uid(uid_t uid)
153 {
154         return uid == current_fsuid() || can_manipulate_uids();
155 }
156
157 static bool can_read_other_uid_stats(uid_t uid)
158 {
159         /* root pwnd */
160         return in_egroup_p(xt_qtaguid_stats_file->gid)
161                 || unlikely(!current_fsuid()) || uid == current_fsuid()
162                 || unlikely(!proc_stats_readall_limited)
163                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
164 }
165
166 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
167                                   enum ifs_tx_rx direction,
168                                   enum ifs_proto ifs_proto,
169                                   int bytes,
170                                   int packets)
171 {
172         counters->bpc[set][direction][ifs_proto].bytes += bytes;
173         counters->bpc[set][direction][ifs_proto].packets += packets;
174 }
175
176 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
177 {
178         struct rb_node *node = root->rb_node;
179
180         while (node) {
181                 struct tag_node *data = rb_entry(node, struct tag_node, node);
182                 int result;
183                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
184                          " node=%p data=%p\n", tag, node, data);
185                 result = tag_compare(tag, data->tag);
186                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
187                          " data.tag=0x%llx (uid=%u) res=%d\n",
188                          tag, data->tag, get_uid_from_tag(data->tag), result);
189                 if (result < 0)
190                         node = node->rb_left;
191                 else if (result > 0)
192                         node = node->rb_right;
193                 else
194                         return data;
195         }
196         return NULL;
197 }
198
199 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
200 {
201         struct rb_node **new = &(root->rb_node), *parent = NULL;
202
203         /* Figure out where to put new node */
204         while (*new) {
205                 struct tag_node *this = rb_entry(*new, struct tag_node,
206                                                  node);
207                 int result = tag_compare(data->tag, this->tag);
208                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
209                          " (uid=%u)\n", __func__,
210                          this->tag,
211                          get_uid_from_tag(this->tag));
212                 parent = *new;
213                 if (result < 0)
214                         new = &((*new)->rb_left);
215                 else if (result > 0)
216                         new = &((*new)->rb_right);
217                 else
218                         BUG();
219         }
220
221         /* Add new node and rebalance tree. */
222         rb_link_node(&data->node, parent, new);
223         rb_insert_color(&data->node, root);
224 }
225
226 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
227 {
228         tag_node_tree_insert(&data->tn, root);
229 }
230
231 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
232 {
233         struct tag_node *node = tag_node_tree_search(root, tag);
234         if (!node)
235                 return NULL;
236         return rb_entry(&node->node, struct tag_stat, tn.node);
237 }
238
239 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
240                                         struct rb_root *root)
241 {
242         tag_node_tree_insert(&data->tn, root);
243 }
244
245 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
246                                                            tag_t tag)
247 {
248         struct tag_node *node = tag_node_tree_search(root, tag);
249         if (!node)
250                 return NULL;
251         return rb_entry(&node->node, struct tag_counter_set, tn.node);
252
253 }
254
255 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
256 {
257         tag_node_tree_insert(&data->tn, root);
258 }
259
260 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
261 {
262         struct tag_node *node = tag_node_tree_search(root, tag);
263         if (!node)
264                 return NULL;
265         return rb_entry(&node->node, struct tag_ref, tn.node);
266 }
267
268 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
269                                              const struct sock *sk)
270 {
271         struct rb_node *node = root->rb_node;
272
273         while (node) {
274                 struct sock_tag *data = rb_entry(node, struct sock_tag,
275                                                  sock_node);
276                 if (sk < data->sk)
277                         node = node->rb_left;
278                 else if (sk > data->sk)
279                         node = node->rb_right;
280                 else
281                         return data;
282         }
283         return NULL;
284 }
285
286 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
287 {
288         struct rb_node **new = &(root->rb_node), *parent = NULL;
289
290         /* Figure out where to put new node */
291         while (*new) {
292                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
293                                                  sock_node);
294                 parent = *new;
295                 if (data->sk < this->sk)
296                         new = &((*new)->rb_left);
297                 else if (data->sk > this->sk)
298                         new = &((*new)->rb_right);
299                 else
300                         BUG();
301         }
302
303         /* Add new node and rebalance tree. */
304         rb_link_node(&data->sock_node, parent, new);
305         rb_insert_color(&data->sock_node, root);
306 }
307
308 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
309 {
310         struct rb_node *node;
311         struct sock_tag *st_entry;
312
313         node = rb_first(st_to_free_tree);
314         while (node) {
315                 st_entry = rb_entry(node, struct sock_tag, sock_node);
316                 node = rb_next(node);
317                 CT_DEBUG("qtaguid: %s(): "
318                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
319                          st_entry->sk,
320                          st_entry->tag,
321                          get_uid_from_tag(st_entry->tag));
322                 rb_erase(&st_entry->sock_node, st_to_free_tree);
323                 sockfd_put(st_entry->socket);
324                 kfree(st_entry);
325         }
326 }
327
328 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
329                                                        const pid_t pid)
330 {
331         struct rb_node *node = root->rb_node;
332
333         while (node) {
334                 struct proc_qtu_data *data = rb_entry(node,
335                                                       struct proc_qtu_data,
336                                                       node);
337                 if (pid < data->pid)
338                         node = node->rb_left;
339                 else if (pid > data->pid)
340                         node = node->rb_right;
341                 else
342                         return data;
343         }
344         return NULL;
345 }
346
347 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
348                                       struct rb_root *root)
349 {
350         struct rb_node **new = &(root->rb_node), *parent = NULL;
351
352         /* Figure out where to put new node */
353         while (*new) {
354                 struct proc_qtu_data *this = rb_entry(*new,
355                                                       struct proc_qtu_data,
356                                                       node);
357                 parent = *new;
358                 if (data->pid < this->pid)
359                         new = &((*new)->rb_left);
360                 else if (data->pid > this->pid)
361                         new = &((*new)->rb_right);
362                 else
363                         BUG();
364         }
365
366         /* Add new node and rebalance tree. */
367         rb_link_node(&data->node, parent, new);
368         rb_insert_color(&data->node, root);
369 }
370
371 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
372                                      struct rb_root *root)
373 {
374         struct rb_node **new = &(root->rb_node), *parent = NULL;
375
376         /* Figure out where to put new node */
377         while (*new) {
378                 struct uid_tag_data *this = rb_entry(*new,
379                                                      struct uid_tag_data,
380                                                      node);
381                 parent = *new;
382                 if (data->uid < this->uid)
383                         new = &((*new)->rb_left);
384                 else if (data->uid > this->uid)
385                         new = &((*new)->rb_right);
386                 else
387                         BUG();
388         }
389
390         /* Add new node and rebalance tree. */
391         rb_link_node(&data->node, parent, new);
392         rb_insert_color(&data->node, root);
393 }
394
395 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
396                                                      uid_t uid)
397 {
398         struct rb_node *node = root->rb_node;
399
400         while (node) {
401                 struct uid_tag_data *data = rb_entry(node,
402                                                      struct uid_tag_data,
403                                                      node);
404                 if (uid < data->uid)
405                         node = node->rb_left;
406                 else if (uid > data->uid)
407                         node = node->rb_right;
408                 else
409                         return data;
410         }
411         return NULL;
412 }
413
414 /*
415  * Allocates a new uid_tag_data struct if needed.
416  * Returns a pointer to the found or allocated uid_tag_data.
417  * Returns a PTR_ERR on failures, and lock is not held.
418  * If found is not NULL:
419  *   sets *found to true if not allocated.
420  *   sets *found to false if allocated.
421  */
422 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
423 {
424         struct uid_tag_data *utd_entry;
425
426         /* Look for top level uid_tag_data for the UID */
427         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
428         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
429
430         if (found_res)
431                 *found_res = utd_entry;
432         if (utd_entry)
433                 return utd_entry;
434
435         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
436         if (!utd_entry) {
437                 pr_err("qtaguid: get_uid_data(%u): "
438                        "tag data alloc failed\n", uid);
439                 return ERR_PTR(-ENOMEM);
440         }
441
442         utd_entry->uid = uid;
443         utd_entry->tag_ref_tree = RB_ROOT;
444         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
445         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
446         return utd_entry;
447 }
448
449 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
450 static struct tag_ref *new_tag_ref(tag_t new_tag,
451                                    struct uid_tag_data *utd_entry)
452 {
453         struct tag_ref *tr_entry;
454         int res;
455
456         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
457                 pr_info("qtaguid: new_tag_ref(0x%llx): "
458                         "tag ref alloc quota exceeded. max=%d\n",
459                         new_tag, max_sock_tags);
460                 res = -EMFILE;
461                 goto err_res;
462
463         }
464
465         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
466         if (!tr_entry) {
467                 pr_err("qtaguid: new_tag_ref(0x%llx): "
468                        "tag ref alloc failed\n",
469                        new_tag);
470                 res = -ENOMEM;
471                 goto err_res;
472         }
473         tr_entry->tn.tag = new_tag;
474         /* tr_entry->num_sock_tags  handled by caller */
475         utd_entry->num_active_tags++;
476         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
477         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
478                  " inserted new tag ref %p\n",
479                  new_tag, tr_entry);
480         return tr_entry;
481
482 err_res:
483         return ERR_PTR(res);
484 }
485
486 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
487                                       struct uid_tag_data **utd_res)
488 {
489         struct uid_tag_data *utd_entry;
490         struct tag_ref *tr_entry;
491         bool found_utd;
492         uid_t uid = get_uid_from_tag(full_tag);
493
494         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
495                  full_tag, uid);
496
497         utd_entry = get_uid_data(uid, &found_utd);
498         if (IS_ERR_OR_NULL(utd_entry)) {
499                 if (utd_res)
500                         *utd_res = utd_entry;
501                 return NULL;
502         }
503
504         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
505         if (utd_res)
506                 *utd_res = utd_entry;
507         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
508                  full_tag, utd_entry, tr_entry);
509         return tr_entry;
510 }
511
512 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
513 static struct tag_ref *get_tag_ref(tag_t full_tag,
514                                    struct uid_tag_data **utd_res)
515 {
516         struct uid_tag_data *utd_entry;
517         struct tag_ref *tr_entry;
518
519         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
520                  full_tag);
521         spin_lock_bh(&uid_tag_data_tree_lock);
522         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
523         BUG_ON(IS_ERR_OR_NULL(utd_entry));
524         if (!tr_entry)
525                 tr_entry = new_tag_ref(full_tag, utd_entry);
526
527         spin_unlock_bh(&uid_tag_data_tree_lock);
528         if (utd_res)
529                 *utd_res = utd_entry;
530         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
531                  full_tag, utd_entry, tr_entry);
532         return tr_entry;
533 }
534
535 /* Checks and maybe frees the UID Tag Data entry */
536 static void put_utd_entry(struct uid_tag_data *utd_entry)
537 {
538         /* Are we done with the UID tag data entry? */
539         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
540                 !utd_entry->num_pqd) {
541                 DR_DEBUG("qtaguid: %s(): "
542                          "erase utd_entry=%p uid=%u "
543                          "by pid=%u tgid=%u uid=%u\n", __func__,
544                          utd_entry, utd_entry->uid,
545                          current->pid, current->tgid, current_fsuid());
546                 BUG_ON(utd_entry->num_active_tags);
547                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
548                 kfree(utd_entry);
549         } else {
550                 DR_DEBUG("qtaguid: %s(): "
551                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
552                          __func__, utd_entry, utd_entry->num_active_tags,
553                          utd_entry->num_pqd);
554                 BUG_ON(!(utd_entry->num_active_tags ||
555                          utd_entry->num_pqd));
556         }
557 }
558
559 /*
560  * If no sock_tags are using this tag_ref,
561  * decrements refcount of utd_entry, removes tr_entry
562  * from utd_entry->tag_ref_tree and frees.
563  */
564 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
565                                         struct uid_tag_data *utd_entry)
566 {
567         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
568                  tr_entry, tr_entry->tn.tag,
569                  get_uid_from_tag(tr_entry->tn.tag));
570         if (!tr_entry->num_sock_tags) {
571                 BUG_ON(!utd_entry->num_active_tags);
572                 utd_entry->num_active_tags--;
573                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
574                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
575                 kfree(tr_entry);
576         }
577 }
578
579 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
580 {
581         struct rb_node *node;
582         struct tag_ref *tr_entry;
583         tag_t acct_tag;
584
585         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
586                  full_tag, get_uid_from_tag(full_tag));
587         acct_tag = get_atag_from_tag(full_tag);
588         node = rb_first(&utd_entry->tag_ref_tree);
589         while (node) {
590                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
591                 node = rb_next(node);
592                 if (!acct_tag || tr_entry->tn.tag == full_tag)
593                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
594         }
595 }
596
597 static int read_proc_u64(struct file *file, char __user *buf,
598                          size_t size, loff_t *ppos)
599 {
600         uint64_t *valuep = PDE_DATA(file_inode(file));
601         char tmp[24];
602         size_t tmp_size;
603
604         tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
605         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
606 }
607
608 static int read_proc_bool(struct file *file, char __user *buf,
609                           size_t size, loff_t *ppos)
610 {
611         bool *valuep = PDE_DATA(file_inode(file));
612         char tmp[24];
613         size_t tmp_size;
614
615         tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
616         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
617 }
618
619 static int get_active_counter_set(tag_t tag)
620 {
621         int active_set = 0;
622         struct tag_counter_set *tcs;
623
624         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
625                  " (uid=%u)\n",
626                  tag, get_uid_from_tag(tag));
627         /* For now we only handle UID tags for active sets */
628         tag = get_utag_from_tag(tag);
629         spin_lock_bh(&tag_counter_set_list_lock);
630         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
631         if (tcs)
632                 active_set = tcs->active_set;
633         spin_unlock_bh(&tag_counter_set_list_lock);
634         return active_set;
635 }
636
637 /*
638  * Find the entry for tracking the specified interface.
639  * Caller must hold iface_stat_list_lock
640  */
641 static struct iface_stat *get_iface_entry(const char *ifname)
642 {
643         struct iface_stat *iface_entry;
644
645         /* Find the entry for tracking the specified tag within the interface */
646         if (ifname == NULL) {
647                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
648                 return NULL;
649         }
650
651         /* Iterate over interfaces */
652         list_for_each_entry(iface_entry, &iface_stat_list, list) {
653                 if (!strcmp(ifname, iface_entry->ifname))
654                         goto done;
655         }
656         iface_entry = NULL;
657 done:
658         return iface_entry;
659 }
660
661 /* This is for fmt2 only */
662 static void pp_iface_stat_header(struct seq_file *m)
663 {
664         seq_puts(m,
665                  "ifname "
666                  "total_skb_rx_bytes total_skb_rx_packets "
667                  "total_skb_tx_bytes total_skb_tx_packets "
668                  "rx_tcp_bytes rx_tcp_packets "
669                  "rx_udp_bytes rx_udp_packets "
670                  "rx_other_bytes rx_other_packets "
671                  "tx_tcp_bytes tx_tcp_packets "
672                  "tx_udp_bytes tx_udp_packets "
673                  "tx_other_bytes tx_other_packets\n"
674         );
675 }
676
677 static void pp_iface_stat_line(struct seq_file *m,
678                                struct iface_stat *iface_entry)
679 {
680         struct data_counters *cnts;
681         int cnt_set = 0;   /* We only use one set for the device */
682         cnts = &iface_entry->totals_via_skb;
683         seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
684                    "%llu %llu %llu %llu %llu %llu %llu %llu\n",
685                    iface_entry->ifname,
686                    dc_sum_bytes(cnts, cnt_set, IFS_RX),
687                    dc_sum_packets(cnts, cnt_set, IFS_RX),
688                    dc_sum_bytes(cnts, cnt_set, IFS_TX),
689                    dc_sum_packets(cnts, cnt_set, IFS_TX),
690                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
691                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
692                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
693                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
694                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
695                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
696                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
697                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
698                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
699                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
700                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
701                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
702 }
703
704 struct proc_iface_stat_fmt_info {
705         int fmt;
706 };
707
708 static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
709 {
710         struct proc_iface_stat_fmt_info *p = m->private;
711         loff_t n = *pos;
712
713         /*
714          * This lock will prevent iface_stat_update() from changing active,
715          * and in turn prevent an interface from unregistering itself.
716          */
717         spin_lock_bh(&iface_stat_list_lock);
718
719         if (unlikely(module_passive))
720                 return NULL;
721
722         if (!n && p->fmt == 2)
723                 pp_iface_stat_header(m);
724
725         return seq_list_start(&iface_stat_list, n);
726 }
727
728 static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
729 {
730         return seq_list_next(p, &iface_stat_list, pos);
731 }
732
733 static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
734 {
735         spin_unlock_bh(&iface_stat_list_lock);
736 }
737
738 static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
739 {
740         struct proc_iface_stat_fmt_info *p = m->private;
741         struct iface_stat *iface_entry;
742         struct rtnl_link_stats64 dev_stats, *stats;
743         struct rtnl_link_stats64 no_dev_stats = {0};
744
745
746         CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
747                  current->pid, current->tgid, current_fsuid());
748
749         iface_entry = list_entry(v, struct iface_stat, list);
750
751         if (iface_entry->active) {
752                 stats = dev_get_stats(iface_entry->net_dev,
753                                       &dev_stats);
754         } else {
755                 stats = &no_dev_stats;
756         }
757         /*
758          * If the meaning of the data changes, then update the fmtX
759          * string.
760          */
761         if (p->fmt == 1) {
762                 seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
763                            iface_entry->ifname,
764                            iface_entry->active,
765                            iface_entry->totals_via_dev[IFS_RX].bytes,
766                            iface_entry->totals_via_dev[IFS_RX].packets,
767                            iface_entry->totals_via_dev[IFS_TX].bytes,
768                            iface_entry->totals_via_dev[IFS_TX].packets,
769                            stats->rx_bytes, stats->rx_packets,
770                            stats->tx_bytes, stats->tx_packets
771                            );
772         } else {
773                 pp_iface_stat_line(m, iface_entry);
774         }
775         return 0;
776 }
777
778 static const struct file_operations read_u64_fops = {
779         .read           = read_proc_u64,
780         .llseek         = default_llseek,
781 };
782
783 static const struct file_operations read_bool_fops = {
784         .read           = read_proc_bool,
785         .llseek         = default_llseek,
786 };
787
788 static void iface_create_proc_worker(struct work_struct *work)
789 {
790         struct proc_dir_entry *proc_entry;
791         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
792                                                    iface_work);
793         struct iface_stat *new_iface  = isw->iface_entry;
794
795         /* iface_entries are not deleted, so safe to manipulate. */
796         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
797         if (IS_ERR_OR_NULL(proc_entry)) {
798                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
799                 kfree(isw);
800                 return;
801         }
802
803         new_iface->proc_ptr = proc_entry;
804
805         proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
806                          &read_u64_fops,
807                          &new_iface->totals_via_dev[IFS_TX].bytes);
808         proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
809                          &read_u64_fops,
810                          &new_iface->totals_via_dev[IFS_RX].bytes);
811         proc_create_data("tx_packets", proc_iface_perms, proc_entry,
812                          &read_u64_fops,
813                          &new_iface->totals_via_dev[IFS_TX].packets);
814         proc_create_data("rx_packets", proc_iface_perms, proc_entry,
815                          &read_u64_fops,
816                          &new_iface->totals_via_dev[IFS_RX].packets);
817         proc_create_data("active", proc_iface_perms, proc_entry,
818                          &read_bool_fops, &new_iface->active);
819
820         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
821                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
822         kfree(isw);
823 }
824
825 /*
826  * Will set the entry's active state, and
827  * update the net_dev accordingly also.
828  */
829 static void _iface_stat_set_active(struct iface_stat *entry,
830                                    struct net_device *net_dev,
831                                    bool activate)
832 {
833         if (activate) {
834                 entry->net_dev = net_dev;
835                 entry->active = true;
836                 IF_DEBUG("qtaguid: %s(%s): "
837                          "enable tracking. rfcnt=%d\n", __func__,
838                          entry->ifname,
839                          __this_cpu_read(*net_dev->pcpu_refcnt));
840         } else {
841                 entry->active = false;
842                 entry->net_dev = NULL;
843                 IF_DEBUG("qtaguid: %s(%s): "
844                          "disable tracking. rfcnt=%d\n", __func__,
845                          entry->ifname,
846                          __this_cpu_read(*net_dev->pcpu_refcnt));
847
848         }
849 }
850
851 /* Caller must hold iface_stat_list_lock */
852 static struct iface_stat *iface_alloc(struct net_device *net_dev)
853 {
854         struct iface_stat *new_iface;
855         struct iface_stat_work *isw;
856
857         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
858         if (new_iface == NULL) {
859                 pr_err("qtaguid: iface_stat: create(%s): "
860                        "iface_stat alloc failed\n", net_dev->name);
861                 return NULL;
862         }
863         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
864         if (new_iface->ifname == NULL) {
865                 pr_err("qtaguid: iface_stat: create(%s): "
866                        "ifname alloc failed\n", net_dev->name);
867                 kfree(new_iface);
868                 return NULL;
869         }
870         spin_lock_init(&new_iface->tag_stat_list_lock);
871         new_iface->tag_stat_tree = RB_ROOT;
872         _iface_stat_set_active(new_iface, net_dev, true);
873
874         /*
875          * ipv6 notifier chains are atomic :(
876          * No create_proc_read_entry() for you!
877          */
878         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
879         if (!isw) {
880                 pr_err("qtaguid: iface_stat: create(%s): "
881                        "work alloc failed\n", new_iface->ifname);
882                 _iface_stat_set_active(new_iface, net_dev, false);
883                 kfree(new_iface->ifname);
884                 kfree(new_iface);
885                 return NULL;
886         }
887         isw->iface_entry = new_iface;
888         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
889         schedule_work(&isw->iface_work);
890         list_add(&new_iface->list, &iface_stat_list);
891         return new_iface;
892 }
893
894 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
895                                                struct iface_stat *iface)
896 {
897         struct rtnl_link_stats64 dev_stats, *stats;
898         bool stats_rewound;
899
900         stats = dev_get_stats(net_dev, &dev_stats);
901         /* No empty packets */
902         stats_rewound =
903                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
904                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
905
906         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
907                  "bytes rx/tx=%llu/%llu "
908                  "active=%d last_known=%d "
909                  "stats_rewound=%d\n", __func__,
910                  net_dev ? net_dev->name : "?",
911                  iface, net_dev,
912                  stats->rx_bytes, stats->tx_bytes,
913                  iface->active, iface->last_known_valid, stats_rewound);
914
915         if (iface->active && iface->last_known_valid && stats_rewound) {
916                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
917                              "iface reset its stats unexpectedly\n", __func__,
918                              net_dev->name);
919
920                 iface->totals_via_dev[IFS_TX].bytes +=
921                         iface->last_known[IFS_TX].bytes;
922                 iface->totals_via_dev[IFS_TX].packets +=
923                         iface->last_known[IFS_TX].packets;
924                 iface->totals_via_dev[IFS_RX].bytes +=
925                         iface->last_known[IFS_RX].bytes;
926                 iface->totals_via_dev[IFS_RX].packets +=
927                         iface->last_known[IFS_RX].packets;
928                 iface->last_known_valid = false;
929                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
930                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
931                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
932                          iface->last_known[IFS_TX].bytes);
933         }
934 }
935
936 /*
937  * Create a new entry for tracking the specified interface.
938  * Do nothing if the entry already exists.
939  * Called when an interface is configured with a valid IP address.
940  */
941 static void iface_stat_create(struct net_device *net_dev,
942                               struct in_ifaddr *ifa)
943 {
944         struct in_device *in_dev = NULL;
945         const char *ifname;
946         struct iface_stat *entry;
947         __be32 ipaddr = 0;
948         struct iface_stat *new_iface;
949
950         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
951                  net_dev ? net_dev->name : "?",
952                  ifa, net_dev);
953         if (!net_dev) {
954                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
955                 return;
956         }
957
958         ifname = net_dev->name;
959         if (!ifa) {
960                 in_dev = in_dev_get(net_dev);
961                 if (!in_dev) {
962                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
963                                ifname);
964                         return;
965                 }
966                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
967                          ifname, in_dev);
968                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
969                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
970                                  "ifa=%p ifa_label=%s\n",
971                                  ifname, ifa,
972                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
973                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
974                                 break;
975                 }
976         }
977
978         if (!ifa) {
979                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
980                          ifname);
981                 goto done_put;
982         }
983         ipaddr = ifa->ifa_local;
984
985         spin_lock_bh(&iface_stat_list_lock);
986         entry = get_iface_entry(ifname);
987         if (entry != NULL) {
988                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
989                          ifname, entry);
990                 iface_check_stats_reset_and_adjust(net_dev, entry);
991                 _iface_stat_set_active(entry, net_dev, true);
992                 IF_DEBUG("qtaguid: %s(%s): "
993                          "tracking now %d on ip=%pI4\n", __func__,
994                          entry->ifname, true, &ipaddr);
995                 goto done_unlock_put;
996         }
997
998         new_iface = iface_alloc(net_dev);
999         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1000                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1001 done_unlock_put:
1002         spin_unlock_bh(&iface_stat_list_lock);
1003 done_put:
1004         if (in_dev)
1005                 in_dev_put(in_dev);
1006 }
1007
1008 static void iface_stat_create_ipv6(struct net_device *net_dev,
1009                                    struct inet6_ifaddr *ifa)
1010 {
1011         struct in_device *in_dev;
1012         const char *ifname;
1013         struct iface_stat *entry;
1014         struct iface_stat *new_iface;
1015         int addr_type;
1016
1017         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1018                  ifa, net_dev, net_dev ? net_dev->name : "");
1019         if (!net_dev) {
1020                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1021                 return;
1022         }
1023         ifname = net_dev->name;
1024
1025         in_dev = in_dev_get(net_dev);
1026         if (!in_dev) {
1027                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1028                        ifname);
1029                 return;
1030         }
1031
1032         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1033                  ifname, in_dev);
1034
1035         if (!ifa) {
1036                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1037                          ifname);
1038                 goto done_put;
1039         }
1040         addr_type = ipv6_addr_type(&ifa->addr);
1041
1042         spin_lock_bh(&iface_stat_list_lock);
1043         entry = get_iface_entry(ifname);
1044         if (entry != NULL) {
1045                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1046                          ifname, entry);
1047                 iface_check_stats_reset_and_adjust(net_dev, entry);
1048                 _iface_stat_set_active(entry, net_dev, true);
1049                 IF_DEBUG("qtaguid: %s(%s): "
1050                          "tracking now %d on ip=%pI6c\n", __func__,
1051                          entry->ifname, true, &ifa->addr);
1052                 goto done_unlock_put;
1053         }
1054
1055         new_iface = iface_alloc(net_dev);
1056         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1057                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1058
1059 done_unlock_put:
1060         spin_unlock_bh(&iface_stat_list_lock);
1061 done_put:
1062         in_dev_put(in_dev);
1063 }
1064
1065 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1066 {
1067         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1068         return sock_tag_tree_search(&sock_tag_tree, sk);
1069 }
1070
1071 static struct sock_tag *get_sock_stat(const struct sock *sk)
1072 {
1073         struct sock_tag *sock_tag_entry;
1074         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1075         if (!sk)
1076                 return NULL;
1077         spin_lock_bh(&sock_tag_list_lock);
1078         sock_tag_entry = get_sock_stat_nl(sk);
1079         spin_unlock_bh(&sock_tag_list_lock);
1080         return sock_tag_entry;
1081 }
1082
1083 static int ipx_proto(const struct sk_buff *skb,
1084                      struct xt_action_param *par)
1085 {
1086         int thoff = 0, tproto;
1087
1088         switch (par->family) {
1089         case NFPROTO_IPV6:
1090                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1091                 if (tproto < 0)
1092                         MT_DEBUG("%s(): transport header not found in ipv6"
1093                                  " skb=%p\n", __func__, skb);
1094                 break;
1095         case NFPROTO_IPV4:
1096                 tproto = ip_hdr(skb)->protocol;
1097                 break;
1098         default:
1099                 tproto = IPPROTO_RAW;
1100         }
1101         return tproto;
1102 }
1103
1104 static void
1105 data_counters_update(struct data_counters *dc, int set,
1106                      enum ifs_tx_rx direction, int proto, int bytes)
1107 {
1108         switch (proto) {
1109         case IPPROTO_TCP:
1110                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1111                 break;
1112         case IPPROTO_UDP:
1113                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1114                 break;
1115         case IPPROTO_IP:
1116         default:
1117                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1118                                     1);
1119                 break;
1120         }
1121 }
1122
1123 /*
1124  * Update stats for the specified interface. Do nothing if the entry
1125  * does not exist (when a device was never configured with an IP address).
1126  * Called when an device is being unregistered.
1127  */
1128 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1129 {
1130         struct rtnl_link_stats64 dev_stats, *stats;
1131         struct iface_stat *entry;
1132
1133         stats = dev_get_stats(net_dev, &dev_stats);
1134         spin_lock_bh(&iface_stat_list_lock);
1135         entry = get_iface_entry(net_dev->name);
1136         if (entry == NULL) {
1137                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1138                          net_dev->name);
1139                 spin_unlock_bh(&iface_stat_list_lock);
1140                 return;
1141         }
1142
1143         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1144                  net_dev->name, entry);
1145         if (!entry->active) {
1146                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1147                          net_dev->name);
1148                 spin_unlock_bh(&iface_stat_list_lock);
1149                 return;
1150         }
1151
1152         if (stash_only) {
1153                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1154                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1155                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1156                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1157                 entry->last_known_valid = true;
1158                 IF_DEBUG("qtaguid: %s(%s): "
1159                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1160                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1161                 spin_unlock_bh(&iface_stat_list_lock);
1162                 return;
1163         }
1164         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1165         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1166         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1167         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1168         /* We don't need the last_known[] anymore */
1169         entry->last_known_valid = false;
1170         _iface_stat_set_active(entry, net_dev, false);
1171         IF_DEBUG("qtaguid: %s(%s): "
1172                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1173                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1174         spin_unlock_bh(&iface_stat_list_lock);
1175 }
1176
1177 /*
1178  * Update stats for the specified interface from the skb.
1179  * Do nothing if the entry
1180  * does not exist (when a device was never configured with an IP address).
1181  * Called on each sk.
1182  */
1183 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1184                                        struct xt_action_param *par)
1185 {
1186         struct iface_stat *entry;
1187         const struct net_device *el_dev;
1188         enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1189         int bytes = skb->len;
1190         int proto;
1191
1192         if (!skb->dev) {
1193                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1194                 el_dev = par->in ? : par->out;
1195         } else {
1196                 const struct net_device *other_dev;
1197                 el_dev = skb->dev;
1198                 other_dev = par->in ? : par->out;
1199                 if (el_dev != other_dev) {
1200                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1201                                  "par->(in/out)=%p %s\n",
1202                                  par->hooknum, el_dev, el_dev->name, other_dev,
1203                                  other_dev->name);
1204                 }
1205         }
1206
1207         if (unlikely(!el_dev)) {
1208                 pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
1209                                    par->hooknum, __func__);
1210                 BUG();
1211         } else if (unlikely(!el_dev->name)) {
1212                 pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
1213                                    par->hooknum, __func__);
1214                 BUG();
1215         } else {
1216                 proto = ipx_proto(skb, par);
1217                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1218                          par->hooknum, el_dev->name, el_dev->type,
1219                          par->family, proto);
1220         }
1221
1222         spin_lock_bh(&iface_stat_list_lock);
1223         entry = get_iface_entry(el_dev->name);
1224         if (entry == NULL) {
1225                 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1226                          __func__, el_dev->name);
1227                 spin_unlock_bh(&iface_stat_list_lock);
1228                 return;
1229         }
1230
1231         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1232                  el_dev->name, entry);
1233
1234         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1235                              bytes);
1236         spin_unlock_bh(&iface_stat_list_lock);
1237 }
1238
1239 static void tag_stat_update(struct tag_stat *tag_entry,
1240                         enum ifs_tx_rx direction, int proto, int bytes)
1241 {
1242         int active_set;
1243         active_set = get_active_counter_set(tag_entry->tn.tag);
1244         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1245                  "dir=%d proto=%d bytes=%d)\n",
1246                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1247                  active_set, direction, proto, bytes);
1248         data_counters_update(&tag_entry->counters, active_set, direction,
1249                              proto, bytes);
1250         if (tag_entry->parent_counters)
1251                 data_counters_update(tag_entry->parent_counters, active_set,
1252                                      direction, proto, bytes);
1253 }
1254
1255 /*
1256  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1257  * the interface.
1258  * iface_entry->tag_stat_list_lock should be held.
1259  */
1260 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1261                                            tag_t tag)
1262 {
1263         struct tag_stat *new_tag_stat_entry = NULL;
1264         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1265                  " (uid=%u)\n", __func__,
1266                  iface_entry, tag, get_uid_from_tag(tag));
1267         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1268         if (!new_tag_stat_entry) {
1269                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1270                 goto done;
1271         }
1272         new_tag_stat_entry->tn.tag = tag;
1273         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1274 done:
1275         return new_tag_stat_entry;
1276 }
1277
1278 static void if_tag_stat_update(const char *ifname, uid_t uid,
1279                                const struct sock *sk, enum ifs_tx_rx direction,
1280                                int proto, int bytes)
1281 {
1282         struct tag_stat *tag_stat_entry;
1283         tag_t tag, acct_tag;
1284         tag_t uid_tag;
1285         struct data_counters *uid_tag_counters;
1286         struct sock_tag *sock_tag_entry;
1287         struct iface_stat *iface_entry;
1288         struct tag_stat *new_tag_stat = NULL;
1289         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1290                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1291                  ifname, uid, sk, direction, proto, bytes);
1292
1293
1294         iface_entry = get_iface_entry(ifname);
1295         if (!iface_entry) {
1296                 pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
1297                                    "%s not found\n", ifname);
1298                 return;
1299         }
1300         /* It is ok to process data when an iface_entry is inactive */
1301
1302         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1303                  ifname, iface_entry);
1304
1305         /*
1306          * Look for a tagged sock.
1307          * It will have an acct_uid.
1308          */
1309         sock_tag_entry = get_sock_stat(sk);
1310         if (sock_tag_entry) {
1311                 tag = sock_tag_entry->tag;
1312                 acct_tag = get_atag_from_tag(tag);
1313                 uid_tag = get_utag_from_tag(tag);
1314         } else {
1315                 acct_tag = make_atag_from_value(0);
1316                 tag = combine_atag_with_uid(acct_tag, uid);
1317                 uid_tag = make_tag_from_uid(uid);
1318         }
1319         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1320                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1321                  tag, get_uid_from_tag(tag), iface_entry);
1322         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1323         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1324
1325         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1326                                               tag);
1327         if (tag_stat_entry) {
1328                 /*
1329                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1330                  * {0, uid_tag} will also get updated.
1331                  */
1332                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1333                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1334                 return;
1335         }
1336
1337         /* Loop over tag list under this interface for {0,uid_tag} */
1338         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1339                                               uid_tag);
1340         if (!tag_stat_entry) {
1341                 /* Here: the base uid_tag did not exist */
1342                 /*
1343                  * No parent counters. So
1344                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1345                  */
1346                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1347                 if (!new_tag_stat)
1348                         goto unlock;
1349                 uid_tag_counters = &new_tag_stat->counters;
1350         } else {
1351                 uid_tag_counters = &tag_stat_entry->counters;
1352         }
1353
1354         if (acct_tag) {
1355                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1356                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1357                 if (!new_tag_stat)
1358                         goto unlock;
1359                 new_tag_stat->parent_counters = uid_tag_counters;
1360         } else {
1361                 /*
1362                  * For new_tag_stat to be still NULL here would require:
1363                  *  {0, uid_tag} exists
1364                  *  and {acct_tag, uid_tag} doesn't exist
1365                  *  AND acct_tag == 0.
1366                  * Impossible. This reassures us that new_tag_stat
1367                  * below will always be assigned.
1368                  */
1369                 BUG_ON(!new_tag_stat);
1370         }
1371         tag_stat_update(new_tag_stat, direction, proto, bytes);
1372 unlock:
1373         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1374 }
1375
1376 static int iface_netdev_event_handler(struct notifier_block *nb,
1377                                       unsigned long event, void *ptr) {
1378         struct net_device *dev = ptr;
1379
1380         if (unlikely(module_passive))
1381                 return NOTIFY_DONE;
1382
1383         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1384                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1385                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1386
1387         switch (event) {
1388         case NETDEV_UP:
1389                 iface_stat_create(dev, NULL);
1390                 atomic64_inc(&qtu_events.iface_events);
1391                 break;
1392         case NETDEV_DOWN:
1393         case NETDEV_UNREGISTER:
1394                 iface_stat_update(dev, event == NETDEV_DOWN);
1395                 atomic64_inc(&qtu_events.iface_events);
1396                 break;
1397         }
1398         return NOTIFY_DONE;
1399 }
1400
1401 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1402                                          unsigned long event, void *ptr)
1403 {
1404         struct inet6_ifaddr *ifa = ptr;
1405         struct net_device *dev;
1406
1407         if (unlikely(module_passive))
1408                 return NOTIFY_DONE;
1409
1410         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1411                  "ev=0x%lx/%s ifa=%p\n",
1412                  event, netdev_evt_str(event), ifa);
1413
1414         switch (event) {
1415         case NETDEV_UP:
1416                 BUG_ON(!ifa || !ifa->idev);
1417                 dev = (struct net_device *)ifa->idev->dev;
1418                 iface_stat_create_ipv6(dev, ifa);
1419                 atomic64_inc(&qtu_events.iface_events);
1420                 break;
1421         case NETDEV_DOWN:
1422         case NETDEV_UNREGISTER:
1423                 BUG_ON(!ifa || !ifa->idev);
1424                 dev = (struct net_device *)ifa->idev->dev;
1425                 iface_stat_update(dev, event == NETDEV_DOWN);
1426                 atomic64_inc(&qtu_events.iface_events);
1427                 break;
1428         }
1429         return NOTIFY_DONE;
1430 }
1431
1432 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1433                                         unsigned long event, void *ptr)
1434 {
1435         struct in_ifaddr *ifa = ptr;
1436         struct net_device *dev;
1437
1438         if (unlikely(module_passive))
1439                 return NOTIFY_DONE;
1440
1441         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1442                  "ev=0x%lx/%s ifa=%p\n",
1443                  event, netdev_evt_str(event), ifa);
1444
1445         switch (event) {
1446         case NETDEV_UP:
1447                 BUG_ON(!ifa || !ifa->ifa_dev);
1448                 dev = ifa->ifa_dev->dev;
1449                 iface_stat_create(dev, ifa);
1450                 atomic64_inc(&qtu_events.iface_events);
1451                 break;
1452         case NETDEV_DOWN:
1453         case NETDEV_UNREGISTER:
1454                 BUG_ON(!ifa || !ifa->ifa_dev);
1455                 dev = ifa->ifa_dev->dev;
1456                 iface_stat_update(dev, event == NETDEV_DOWN);
1457                 atomic64_inc(&qtu_events.iface_events);
1458                 break;
1459         }
1460         return NOTIFY_DONE;
1461 }
1462
1463 static struct notifier_block iface_netdev_notifier_blk = {
1464         .notifier_call = iface_netdev_event_handler,
1465 };
1466
1467 static struct notifier_block iface_inetaddr_notifier_blk = {
1468         .notifier_call = iface_inetaddr_event_handler,
1469 };
1470
1471 static struct notifier_block iface_inet6addr_notifier_blk = {
1472         .notifier_call = iface_inet6addr_event_handler,
1473 };
1474
1475 static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
1476         .start  = iface_stat_fmt_proc_start,
1477         .next   = iface_stat_fmt_proc_next,
1478         .stop   = iface_stat_fmt_proc_stop,
1479         .show   = iface_stat_fmt_proc_show,
1480 };
1481
1482 static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
1483 {
1484         struct proc_iface_stat_fmt_info *s;
1485
1486         s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
1487                         sizeof(struct proc_iface_stat_fmt_info));
1488         if (!s)
1489                 return -ENOMEM;
1490
1491         s->fmt = (int)PDE_DATA(inode);
1492         return 0;
1493 }
1494
1495 static const struct file_operations proc_iface_stat_fmt_fops = {
1496         .open           = proc_iface_stat_fmt_open,
1497         .read           = seq_read,
1498         .llseek         = seq_lseek,
1499         .release        = seq_release,
1500 };
1501
1502 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1503 {
1504         int err;
1505
1506         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1507         if (!iface_stat_procdir) {
1508                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1509                 err = -1;
1510                 goto err;
1511         }
1512
1513         iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
1514                                                    proc_iface_perms,
1515                                                    parent_procdir,
1516                                                    &proc_iface_stat_fmt_fops,
1517                                                    (void *)1 /* fmt1 */);
1518         if (!iface_stat_all_procfile) {
1519                 pr_err("qtaguid: iface_stat: init "
1520                        " failed to create stat_old proc entry\n");
1521                 err = -1;
1522                 goto err_zap_entry;
1523         }
1524
1525         iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
1526                                                    proc_iface_perms,
1527                                                    parent_procdir,
1528                                                    &proc_iface_stat_fmt_fops,
1529                                                    (void *)2 /* fmt2 */);
1530         if (!iface_stat_fmt_procfile) {
1531                 pr_err("qtaguid: iface_stat: init "
1532                        " failed to create stat_all proc entry\n");
1533                 err = -1;
1534                 goto err_zap_all_stats_entry;
1535         }
1536
1537
1538         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1539         if (err) {
1540                 pr_err("qtaguid: iface_stat: init "
1541                        "failed to register dev event handler\n");
1542                 goto err_zap_all_stats_entries;
1543         }
1544         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1545         if (err) {
1546                 pr_err("qtaguid: iface_stat: init "
1547                        "failed to register ipv4 dev event handler\n");
1548                 goto err_unreg_nd;
1549         }
1550
1551         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1552         if (err) {
1553                 pr_err("qtaguid: iface_stat: init "
1554                        "failed to register ipv6 dev event handler\n");
1555                 goto err_unreg_ip4_addr;
1556         }
1557         return 0;
1558
1559 err_unreg_ip4_addr:
1560         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1561 err_unreg_nd:
1562         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1563 err_zap_all_stats_entries:
1564         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1565 err_zap_all_stats_entry:
1566         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1567 err_zap_entry:
1568         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1569 err:
1570         return err;
1571 }
1572
1573 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1574                                     struct xt_action_param *par)
1575 {
1576         struct sock *sk;
1577         unsigned int hook_mask = (1 << par->hooknum);
1578
1579         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1580                  par->hooknum, par->family);
1581
1582         /*
1583          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1584          * return garbage SKs.
1585          */
1586         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1587                 return NULL;
1588
1589         switch (par->family) {
1590         case NFPROTO_IPV6:
1591                 sk = xt_socket_get6_sk(skb, par);
1592                 break;
1593         case NFPROTO_IPV4:
1594                 sk = xt_socket_get4_sk(skb, par);
1595                 break;
1596         default:
1597                 return NULL;
1598         }
1599
1600         /*
1601          * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1602          * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1603          * Not fixed in 3.0-r3 :(
1604          */
1605         if (sk) {
1606                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1607                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1608                 if (sk->sk_state  == TCP_TIME_WAIT) {
1609                         xt_socket_put_sk(sk);
1610                         sk = NULL;
1611                 }
1612         }
1613         return sk;
1614 }
1615
1616 static void account_for_uid(const struct sk_buff *skb,
1617                             const struct sock *alternate_sk, uid_t uid,
1618                             struct xt_action_param *par)
1619 {
1620         const struct net_device *el_dev;
1621
1622         if (!skb->dev) {
1623                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1624                 el_dev = par->in ? : par->out;
1625         } else {
1626                 const struct net_device *other_dev;
1627                 el_dev = skb->dev;
1628                 other_dev = par->in ? : par->out;
1629                 if (el_dev != other_dev) {
1630                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1631                                 "par->(in/out)=%p %s\n",
1632                                 par->hooknum, el_dev, el_dev->name, other_dev,
1633                                 other_dev->name);
1634                 }
1635         }
1636
1637         if (unlikely(!el_dev)) {
1638                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1639         } else if (unlikely(!el_dev->name)) {
1640                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1641         } else {
1642                 int proto = ipx_proto(skb, par);
1643                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1644                          par->hooknum, el_dev->name, el_dev->type,
1645                          par->family, proto);
1646
1647                 if_tag_stat_update(el_dev->name, uid,
1648                                 skb->sk ? skb->sk : alternate_sk,
1649                                 par->in ? IFS_RX : IFS_TX,
1650                                 proto, skb->len);
1651         }
1652 }
1653
1654 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1655 {
1656         const struct xt_qtaguid_match_info *info = par->matchinfo;
1657         const struct file *filp;
1658         bool got_sock = false;
1659         struct sock *sk;
1660         uid_t sock_uid;
1661         bool res;
1662
1663         if (unlikely(module_passive))
1664                 return (info->match ^ info->invert) == 0;
1665
1666         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1667                  par->hooknum, skb, par->in, par->out, par->family);
1668
1669         atomic64_inc(&qtu_events.match_calls);
1670         if (skb == NULL) {
1671                 res = (info->match ^ info->invert) == 0;
1672                 goto ret_res;
1673         }
1674
1675         switch (par->hooknum) {
1676         case NF_INET_PRE_ROUTING:
1677         case NF_INET_POST_ROUTING:
1678                 atomic64_inc(&qtu_events.match_calls_prepost);
1679                 iface_stat_update_from_skb(skb, par);
1680                 /*
1681                  * We are done in pre/post. The skb will get processed
1682                  * further alter.
1683                  */
1684                 res = (info->match ^ info->invert);
1685                 goto ret_res;
1686                 break;
1687         /* default: Fall through and do UID releated work */
1688         }
1689
1690         sk = skb->sk;
1691         if (sk == NULL) {
1692                 /*
1693                  * A missing sk->sk_socket happens when packets are in-flight
1694                  * and the matching socket is already closed and gone.
1695                  */
1696                 sk = qtaguid_find_sk(skb, par);
1697                 /*
1698                  * If we got the socket from the find_sk(), we will need to put
1699                  * it back, as nf_tproxy_get_sock_v4() got it.
1700                  */
1701                 got_sock = sk;
1702                 if (sk)
1703                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1704                 else
1705                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1706         } else {
1707                 atomic64_inc(&qtu_events.match_found_sk);
1708         }
1709         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1710                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1711         if (sk != NULL) {
1712                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1713                         par->hooknum, sk, sk->sk_socket,
1714                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1715                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1716                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1717                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1718         }
1719
1720         if (sk == NULL || sk->sk_socket == NULL) {
1721                 /*
1722                  * Here, the qtaguid_find_sk() using connection tracking
1723                  * couldn't find the owner, so for now we just count them
1724                  * against the system.
1725                  */
1726                 /*
1727                  * TODO: unhack how to force just accounting.
1728                  * For now we only do iface stats when the uid-owner is not
1729                  * requested.
1730                  */
1731                 if (!(info->match & XT_QTAGUID_UID))
1732                         account_for_uid(skb, sk, 0, par);
1733                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1734                         par->hooknum,
1735                         sk ? sk->sk_socket : NULL);
1736                 res = (info->match ^ info->invert) == 0;
1737                 atomic64_inc(&qtu_events.match_no_sk);
1738                 goto put_sock_ret_res;
1739         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1740                 res = false;
1741                 goto put_sock_ret_res;
1742         }
1743         filp = sk->sk_socket->file;
1744         if (filp == NULL) {
1745                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1746                 account_for_uid(skb, sk, 0, par);
1747                 res = ((info->match ^ info->invert) &
1748                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1749                 atomic64_inc(&qtu_events.match_no_sk_file);
1750                 goto put_sock_ret_res;
1751         }
1752         sock_uid = filp->f_cred->fsuid;
1753         /*
1754          * TODO: unhack how to force just accounting.
1755          * For now we only do iface stats when the uid-owner is not requested
1756          */
1757         if (!(info->match & XT_QTAGUID_UID))
1758                 account_for_uid(skb, sk, sock_uid, par);
1759
1760         /*
1761          * The following two tests fail the match when:
1762          *    id not in range AND no inverted condition requested
1763          * or id     in range AND    inverted condition requested
1764          * Thus (!a && b) || (a && !b) == a ^ b
1765          */
1766         if (info->match & XT_QTAGUID_UID)
1767                 if ((filp->f_cred->fsuid >= info->uid_min &&
1768                      filp->f_cred->fsuid <= info->uid_max) ^
1769                     !(info->invert & XT_QTAGUID_UID)) {
1770                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1771                                  par->hooknum);
1772                         res = false;
1773                         goto put_sock_ret_res;
1774                 }
1775         if (info->match & XT_QTAGUID_GID)
1776                 if ((filp->f_cred->fsgid >= info->gid_min &&
1777                                 filp->f_cred->fsgid <= info->gid_max) ^
1778                         !(info->invert & XT_QTAGUID_GID)) {
1779                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1780                                 par->hooknum);
1781                         res = false;
1782                         goto put_sock_ret_res;
1783                 }
1784
1785         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1786         res = true;
1787
1788 put_sock_ret_res:
1789         if (got_sock)
1790                 xt_socket_put_sk(sk);
1791 ret_res:
1792         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1793         return res;
1794 }
1795
1796 #ifdef DDEBUG
1797 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1798 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1799 {
1800         va_list args;
1801         char *fmt_buff;
1802         char *buff;
1803
1804         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1805                 return;
1806
1807         fmt_buff = kasprintf(GFP_ATOMIC,
1808                              "qtaguid: %s(): %s {\n", __func__, fmt);
1809         BUG_ON(!fmt_buff);
1810         va_start(args, fmt);
1811         buff = kvasprintf(GFP_ATOMIC,
1812                           fmt_buff, args);
1813         BUG_ON(!buff);
1814         pr_debug("%s", buff);
1815         kfree(fmt_buff);
1816         kfree(buff);
1817         va_end(args);
1818
1819         spin_lock_bh(&sock_tag_list_lock);
1820         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1821         spin_unlock_bh(&sock_tag_list_lock);
1822
1823         spin_lock_bh(&sock_tag_list_lock);
1824         spin_lock_bh(&uid_tag_data_tree_lock);
1825         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1826         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1827         spin_unlock_bh(&uid_tag_data_tree_lock);
1828         spin_unlock_bh(&sock_tag_list_lock);
1829
1830         spin_lock_bh(&iface_stat_list_lock);
1831         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1832         spin_unlock_bh(&iface_stat_list_lock);
1833
1834         pr_debug("qtaguid: %s(): }\n", __func__);
1835 }
1836 #else
1837 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1838 #endif
1839
1840 struct proc_ctrl_print_info {
1841         struct sock *sk; /* socket found by reading to sk_pos */
1842         loff_t sk_pos;
1843 };
1844
1845 static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
1846 {
1847         struct proc_ctrl_print_info *pcpi = m->private;
1848         struct sock_tag *sock_tag_entry = v;
1849         struct rb_node *node;
1850
1851         (*pos)++;
1852
1853         if (!v || v  == SEQ_START_TOKEN)
1854                 return NULL;
1855
1856         node = rb_next(&sock_tag_entry->sock_node);
1857         if (!node) {
1858                 pcpi->sk = NULL;
1859                 sock_tag_entry = SEQ_START_TOKEN;
1860         } else {
1861                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1862                 pcpi->sk = sock_tag_entry->sk;
1863         }
1864         pcpi->sk_pos = *pos;
1865         return sock_tag_entry;
1866 }
1867
1868 static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
1869 {
1870         struct proc_ctrl_print_info *pcpi = m->private;
1871         struct sock_tag *sock_tag_entry;
1872         struct rb_node *node;
1873
1874         spin_lock_bh(&sock_tag_list_lock);
1875
1876         if (unlikely(module_passive))
1877                 return NULL;
1878
1879         if (*pos == 0) {
1880                 pcpi->sk_pos = 0;
1881                 node = rb_first(&sock_tag_tree);
1882                 if (!node) {
1883                         pcpi->sk = NULL;
1884                         return SEQ_START_TOKEN;
1885                 }
1886                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1887                 pcpi->sk = sock_tag_entry->sk;
1888         } else {
1889                 sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
1890                                                 NULL) ?: SEQ_START_TOKEN;
1891                 if (*pos != pcpi->sk_pos) {
1892                         /* seq_read skipped a next call */
1893                         *pos = pcpi->sk_pos;
1894                         return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
1895                 }
1896         }
1897         return sock_tag_entry;
1898 }
1899
1900 static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
1901 {
1902         spin_unlock_bh(&sock_tag_list_lock);
1903 }
1904
1905 /*
1906  * Procfs reader to get all active socket tags using style "1)" as described in
1907  * fs/proc/generic.c
1908  */
1909 static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
1910 {
1911         struct sock_tag *sock_tag_entry = v;
1912         uid_t uid;
1913         long f_count;
1914
1915         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1916                  current->pid, current->tgid, current_fsuid());
1917
1918         if (sock_tag_entry != SEQ_START_TOKEN) {
1919                 uid = get_uid_from_tag(sock_tag_entry->tag);
1920                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1921                          "pid=%u\n",
1922                          sock_tag_entry->sk,
1923                          sock_tag_entry->tag,
1924                          uid,
1925                          sock_tag_entry->pid
1926                         );
1927                 f_count = atomic_long_read(
1928                         &sock_tag_entry->socket->file->f_count);
1929                 seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
1930                            "f_count=%lu\n",
1931                            sock_tag_entry->sk,
1932                            sock_tag_entry->tag, uid,
1933                            sock_tag_entry->pid, f_count);
1934         } else {
1935                 seq_printf(m, "events: sockets_tagged=%llu "
1936                            "sockets_untagged=%llu "
1937                            "counter_set_changes=%llu "
1938                            "delete_cmds=%llu "
1939                            "iface_events=%llu "
1940                            "match_calls=%llu "
1941                            "match_calls_prepost=%llu "
1942                            "match_found_sk=%llu "
1943                            "match_found_sk_in_ct=%llu "
1944                            "match_found_no_sk_in_ct=%llu "
1945                            "match_no_sk=%llu "
1946                            "match_no_sk_file=%llu\n",
1947                            atomic64_read(&qtu_events.sockets_tagged),
1948                            atomic64_read(&qtu_events.sockets_untagged),
1949                            atomic64_read(&qtu_events.counter_set_changes),
1950                            atomic64_read(&qtu_events.delete_cmds),
1951                            atomic64_read(&qtu_events.iface_events),
1952                            atomic64_read(&qtu_events.match_calls),
1953                            atomic64_read(&qtu_events.match_calls_prepost),
1954                            atomic64_read(&qtu_events.match_found_sk),
1955                            atomic64_read(&qtu_events.match_found_sk_in_ct),
1956                            atomic64_read(&qtu_events.match_found_no_sk_in_ct),
1957                            atomic64_read(&qtu_events.match_no_sk),
1958                            atomic64_read(&qtu_events.match_no_sk_file));
1959
1960                 /* Count the following as part of the last item_index */
1961                 prdebug_full_state(0, "proc ctrl");
1962         }
1963
1964         return 0;
1965 }
1966
1967 /*
1968  * Delete socket tags, and stat tags associated with a given
1969  * accouting tag and uid.
1970  */
1971 static int ctrl_cmd_delete(const char *input)
1972 {
1973         char cmd;
1974         uid_t uid;
1975         uid_t entry_uid;
1976         tag_t acct_tag;
1977         tag_t tag;
1978         int res, argc;
1979         struct iface_stat *iface_entry;
1980         struct rb_node *node;
1981         struct sock_tag *st_entry;
1982         struct rb_root st_to_free_tree = RB_ROOT;
1983         struct tag_stat *ts_entry;
1984         struct tag_counter_set *tcs_entry;
1985         struct tag_ref *tr_entry;
1986         struct uid_tag_data *utd_entry;
1987
1988         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1989         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1990                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1991                  acct_tag, uid);
1992         if (argc < 2) {
1993                 res = -EINVAL;
1994                 goto err;
1995         }
1996         if (!valid_atag(acct_tag)) {
1997                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1998                 res = -EINVAL;
1999                 goto err;
2000         }
2001         if (argc < 3) {
2002                 uid = current_fsuid();
2003         } else if (!can_impersonate_uid(uid)) {
2004                 pr_info("qtaguid: ctrl_delete(%s): "
2005                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2006                         input, current->pid, current->tgid, current_fsuid());
2007                 res = -EPERM;
2008                 goto err;
2009         }
2010
2011         tag = combine_atag_with_uid(acct_tag, uid);
2012         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2013                  "looking for tag=0x%llx (uid=%u)\n",
2014                  input, tag, uid);
2015
2016         /* Delete socket tags */
2017         spin_lock_bh(&sock_tag_list_lock);
2018         node = rb_first(&sock_tag_tree);
2019         while (node) {
2020                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2021                 entry_uid = get_uid_from_tag(st_entry->tag);
2022                 node = rb_next(node);
2023                 if (entry_uid != uid)
2024                         continue;
2025
2026                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2027                          input, st_entry->tag, entry_uid);
2028
2029                 if (!acct_tag || st_entry->tag == tag) {
2030                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2031                         /* Can't sockfd_put() within spinlock, do it later. */
2032                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2033                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2034                         BUG_ON(tr_entry->num_sock_tags <= 0);
2035                         tr_entry->num_sock_tags--;
2036                         /*
2037                          * TODO: remove if, and start failing.
2038                          * This is a hack to work around the fact that in some
2039                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2040                          * and are trying to work around apps
2041                          * that didn't open the /dev/xt_qtaguid.
2042                          */
2043                         if (st_entry->list.next && st_entry->list.prev)
2044                                 list_del(&st_entry->list);
2045                 }
2046         }
2047         spin_unlock_bh(&sock_tag_list_lock);
2048
2049         sock_tag_tree_erase(&st_to_free_tree);
2050
2051         /* Delete tag counter-sets */
2052         spin_lock_bh(&tag_counter_set_list_lock);
2053         /* Counter sets are only on the uid tag, not full tag */
2054         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2055         if (tcs_entry) {
2056                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2057                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2058                          input,
2059                          tcs_entry->tn.tag,
2060                          get_uid_from_tag(tcs_entry->tn.tag),
2061                          tcs_entry->active_set);
2062                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2063                 kfree(tcs_entry);
2064         }
2065         spin_unlock_bh(&tag_counter_set_list_lock);
2066
2067         /*
2068          * If acct_tag is 0, then all entries belonging to uid are
2069          * erased.
2070          */
2071         spin_lock_bh(&iface_stat_list_lock);
2072         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2073                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2074                 node = rb_first(&iface_entry->tag_stat_tree);
2075                 while (node) {
2076                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2077                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2078                         node = rb_next(node);
2079
2080                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2081                                  "ts tag=0x%llx (uid=%u)\n",
2082                                  input, ts_entry->tn.tag, entry_uid);
2083
2084                         if (entry_uid != uid)
2085                                 continue;
2086                         if (!acct_tag || ts_entry->tn.tag == tag) {
2087                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2088                                          "erase ts: %s 0x%llx %u\n",
2089                                          input, iface_entry->ifname,
2090                                          get_atag_from_tag(ts_entry->tn.tag),
2091                                          entry_uid);
2092                                 rb_erase(&ts_entry->tn.node,
2093                                          &iface_entry->tag_stat_tree);
2094                                 kfree(ts_entry);
2095                         }
2096                 }
2097                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2098         }
2099         spin_unlock_bh(&iface_stat_list_lock);
2100
2101         /* Cleanup the uid_tag_data */
2102         spin_lock_bh(&uid_tag_data_tree_lock);
2103         node = rb_first(&uid_tag_data_tree);
2104         while (node) {
2105                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2106                 entry_uid = utd_entry->uid;
2107                 node = rb_next(node);
2108
2109                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2110                          "utd uid=%u\n",
2111                          input, entry_uid);
2112
2113                 if (entry_uid != uid)
2114                         continue;
2115                 /*
2116                  * Go over the tag_refs, and those that don't have
2117                  * sock_tags using them are freed.
2118                  */
2119                 put_tag_ref_tree(tag, utd_entry);
2120                 put_utd_entry(utd_entry);
2121         }
2122         spin_unlock_bh(&uid_tag_data_tree_lock);
2123
2124         atomic64_inc(&qtu_events.delete_cmds);
2125         res = 0;
2126
2127 err:
2128         return res;
2129 }
2130
2131 static int ctrl_cmd_counter_set(const char *input)
2132 {
2133         char cmd;
2134         uid_t uid = 0;
2135         tag_t tag;
2136         int res, argc;
2137         struct tag_counter_set *tcs;
2138         int counter_set;
2139
2140         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2141         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2142                  "set=%d uid=%u\n", input, argc, cmd,
2143                  counter_set, uid);
2144         if (argc != 3) {
2145                 res = -EINVAL;
2146                 goto err;
2147         }
2148         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2149                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2150                         input);
2151                 res = -EINVAL;
2152                 goto err;
2153         }
2154         if (!can_manipulate_uids()) {
2155                 pr_info("qtaguid: ctrl_counterset(%s): "
2156                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2157                         input, current->pid, current->tgid, current_fsuid());
2158                 res = -EPERM;
2159                 goto err;
2160         }
2161
2162         tag = make_tag_from_uid(uid);
2163         spin_lock_bh(&tag_counter_set_list_lock);
2164         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2165         if (!tcs) {
2166                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2167                 if (!tcs) {
2168                         spin_unlock_bh(&tag_counter_set_list_lock);
2169                         pr_err("qtaguid: ctrl_counterset(%s): "
2170                                "failed to alloc counter set\n",
2171                                input);
2172                         res = -ENOMEM;
2173                         goto err;
2174                 }
2175                 tcs->tn.tag = tag;
2176                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2177                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2178                          "(uid=%u) set=%d\n",
2179                          input, tag, get_uid_from_tag(tag), counter_set);
2180         }
2181         tcs->active_set = counter_set;
2182         spin_unlock_bh(&tag_counter_set_list_lock);
2183         atomic64_inc(&qtu_events.counter_set_changes);
2184         res = 0;
2185
2186 err:
2187         return res;
2188 }
2189
2190 static int ctrl_cmd_tag(const char *input)
2191 {
2192         char cmd;
2193         int sock_fd = 0;
2194         uid_t uid = 0;
2195         tag_t acct_tag = make_atag_from_value(0);
2196         tag_t full_tag;
2197         struct socket *el_socket;
2198         int res, argc;
2199         struct sock_tag *sock_tag_entry;
2200         struct tag_ref *tag_ref_entry;
2201         struct uid_tag_data *uid_tag_data_entry;
2202         struct proc_qtu_data *pqd_entry;
2203
2204         /* Unassigned args will get defaulted later. */
2205         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2206         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2207                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2208                  acct_tag, uid);
2209         if (argc < 2) {
2210                 res = -EINVAL;
2211                 goto err;
2212         }
2213         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2214         if (!el_socket) {
2215                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2216                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2217                         input, sock_fd, res, current->pid, current->tgid,
2218                         current_fsuid());
2219                 goto err;
2220         }
2221         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2222                  input, atomic_long_read(&el_socket->file->f_count),
2223                  el_socket->sk);
2224         if (argc < 3) {
2225                 acct_tag = make_atag_from_value(0);
2226         } else if (!valid_atag(acct_tag)) {
2227                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2228                 res = -EINVAL;
2229                 goto err_put;
2230         }
2231         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2232                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2233                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2234                  input, current->pid, current->tgid, current_uid(),
2235                  current_euid(), current_fsuid(),
2236                  xt_qtaguid_ctrl_file->gid,
2237                  in_group_p(xt_qtaguid_ctrl_file->gid),
2238                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2239         if (argc < 4) {
2240                 uid = current_fsuid();
2241         } else if (!can_impersonate_uid(uid)) {
2242                 pr_info("qtaguid: ctrl_tag(%s): "
2243                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2244                         input, current->pid, current->tgid, current_fsuid());
2245                 res = -EPERM;
2246                 goto err_put;
2247         }
2248         full_tag = combine_atag_with_uid(acct_tag, uid);
2249
2250         spin_lock_bh(&sock_tag_list_lock);
2251         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2252         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2253         if (IS_ERR(tag_ref_entry)) {
2254                 res = PTR_ERR(tag_ref_entry);
2255                 spin_unlock_bh(&sock_tag_list_lock);
2256                 goto err_put;
2257         }
2258         tag_ref_entry->num_sock_tags++;
2259         if (sock_tag_entry) {
2260                 struct tag_ref *prev_tag_ref_entry;
2261
2262                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2263                          "st@%p ...->f_count=%ld\n",
2264                          input, el_socket->sk, sock_tag_entry,
2265                          atomic_long_read(&el_socket->file->f_count));
2266                 /*
2267                  * This is a re-tagging, so release the sock_fd that was
2268                  * locked at the time of the 1st tagging.
2269                  * There is still the ref from this call's sockfd_lookup() so
2270                  * it can be done within the spinlock.
2271                  */
2272                 sockfd_put(sock_tag_entry->socket);
2273                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2274                                                     &uid_tag_data_entry);
2275                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2276                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2277                 prev_tag_ref_entry->num_sock_tags--;
2278                 sock_tag_entry->tag = full_tag;
2279         } else {
2280                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2281                          input, el_socket->sk);
2282                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2283                                          GFP_ATOMIC);
2284                 if (!sock_tag_entry) {
2285                         pr_err("qtaguid: ctrl_tag(%s): "
2286                                "socket tag alloc failed\n",
2287                                input);
2288                         spin_unlock_bh(&sock_tag_list_lock);
2289                         res = -ENOMEM;
2290                         goto err_tag_unref_put;
2291                 }
2292                 sock_tag_entry->sk = el_socket->sk;
2293                 sock_tag_entry->socket = el_socket;
2294                 sock_tag_entry->pid = current->tgid;
2295                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2296                                                             uid);
2297                 spin_lock_bh(&uid_tag_data_tree_lock);
2298                 pqd_entry = proc_qtu_data_tree_search(
2299                         &proc_qtu_data_tree, current->tgid);
2300                 /*
2301                  * TODO: remove if, and start failing.
2302                  * At first, we want to catch user-space code that is not
2303                  * opening the /dev/xt_qtaguid.
2304                  */
2305                 if (IS_ERR_OR_NULL(pqd_entry))
2306                         pr_warn_once(
2307                                 "qtaguid: %s(): "
2308                                 "User space forgot to open /dev/xt_qtaguid? "
2309                                 "pid=%u tgid=%u uid=%u\n", __func__,
2310                                 current->pid, current->tgid,
2311                                 current_fsuid());
2312                 else
2313                         list_add(&sock_tag_entry->list,
2314                                  &pqd_entry->sock_tag_list);
2315                 spin_unlock_bh(&uid_tag_data_tree_lock);
2316
2317                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2318                 atomic64_inc(&qtu_events.sockets_tagged);
2319         }
2320         spin_unlock_bh(&sock_tag_list_lock);
2321         /* We keep the ref to the socket (file) until it is untagged */
2322         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2323                  input, sock_tag_entry,
2324                  atomic_long_read(&el_socket->file->f_count));
2325         return 0;
2326
2327 err_tag_unref_put:
2328         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2329         tag_ref_entry->num_sock_tags--;
2330         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2331 err_put:
2332         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2333                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2334         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2335         sockfd_put(el_socket);
2336         return res;
2337
2338 err:
2339         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2340         return res;
2341 }
2342
2343 static int ctrl_cmd_untag(const char *input)
2344 {
2345         char cmd;
2346         int sock_fd = 0;
2347         struct socket *el_socket;
2348         int res, argc;
2349         struct sock_tag *sock_tag_entry;
2350         struct tag_ref *tag_ref_entry;
2351         struct uid_tag_data *utd_entry;
2352         struct proc_qtu_data *pqd_entry;
2353
2354         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2355         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2356                  input, argc, cmd, sock_fd);
2357         if (argc < 2) {
2358                 res = -EINVAL;
2359                 goto err;
2360         }
2361         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2362         if (!el_socket) {
2363                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2364                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2365                         input, sock_fd, res, current->pid, current->tgid,
2366                         current_fsuid());
2367                 goto err;
2368         }
2369         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2370                  input, atomic_long_read(&el_socket->file->f_count),
2371                  el_socket->sk);
2372         spin_lock_bh(&sock_tag_list_lock);
2373         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2374         if (!sock_tag_entry) {
2375                 spin_unlock_bh(&sock_tag_list_lock);
2376                 res = -EINVAL;
2377                 goto err_put;
2378         }
2379         /*
2380          * The socket already belongs to the current process
2381          * so it can do whatever it wants to it.
2382          */
2383         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2384
2385         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2386         BUG_ON(!tag_ref_entry);
2387         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2388         spin_lock_bh(&uid_tag_data_tree_lock);
2389         pqd_entry = proc_qtu_data_tree_search(
2390                 &proc_qtu_data_tree, current->tgid);
2391         /*
2392          * TODO: remove if, and start failing.
2393          * At first, we want to catch user-space code that is not
2394          * opening the /dev/xt_qtaguid.
2395          */
2396         if (IS_ERR_OR_NULL(pqd_entry))
2397                 pr_warn_once("qtaguid: %s(): "
2398                              "User space forgot to open /dev/xt_qtaguid? "
2399                              "pid=%u tgid=%u uid=%u\n", __func__,
2400                              current->pid, current->tgid, current_fsuid());
2401         else
2402                 list_del(&sock_tag_entry->list);
2403         spin_unlock_bh(&uid_tag_data_tree_lock);
2404         /*
2405          * We don't free tag_ref from the utd_entry here,
2406          * only during a cmd_delete().
2407          */
2408         tag_ref_entry->num_sock_tags--;
2409         spin_unlock_bh(&sock_tag_list_lock);
2410         /*
2411          * Release the sock_fd that was grabbed at tag time,
2412          * and once more for the sockfd_lookup() here.
2413          */
2414         sockfd_put(sock_tag_entry->socket);
2415         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2416                  input, sock_tag_entry,
2417                  atomic_long_read(&el_socket->file->f_count) - 1);
2418         sockfd_put(el_socket);
2419
2420         kfree(sock_tag_entry);
2421         atomic64_inc(&qtu_events.sockets_untagged);
2422
2423         return 0;
2424
2425 err_put:
2426         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2427                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2428         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2429         sockfd_put(el_socket);
2430         return res;
2431
2432 err:
2433         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2434         return res;
2435 }
2436
2437 static int qtaguid_ctrl_parse(const char *input, int count)
2438 {
2439         char cmd;
2440         int res;
2441
2442         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2443                  input, current->pid, current->tgid, current_fsuid());
2444
2445         cmd = input[0];
2446         /* Collect params for commands */
2447         switch (cmd) {
2448         case 'd':
2449                 res = ctrl_cmd_delete(input);
2450                 break;
2451
2452         case 's':
2453                 res = ctrl_cmd_counter_set(input);
2454                 break;
2455
2456         case 't':
2457                 res = ctrl_cmd_tag(input);
2458                 break;
2459
2460         case 'u':
2461                 res = ctrl_cmd_untag(input);
2462                 break;
2463
2464         default:
2465                 res = -EINVAL;
2466                 goto err;
2467         }
2468         if (!res)
2469                 res = count;
2470 err:
2471         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2472         return res;
2473 }
2474
2475 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2476 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2477                                    size_t count, loff_t *offp)
2478 {
2479         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2480
2481         if (unlikely(module_passive))
2482                 return count;
2483
2484         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2485                 return -EINVAL;
2486
2487         if (copy_from_user(input_buf, buffer, count))
2488                 return -EFAULT;
2489
2490         input_buf[count] = '\0';
2491         return qtaguid_ctrl_parse(input_buf, count);
2492 }
2493
2494 struct proc_print_info {
2495         struct iface_stat *iface_entry;
2496         int item_index;
2497         tag_t tag; /* tag found by reading to tag_pos */
2498         off_t tag_pos;
2499         int tag_item_index;
2500 };
2501
2502 static void pp_stats_header(struct seq_file *m)
2503 {
2504         seq_puts(m,
2505                  "idx iface acct_tag_hex uid_tag_int cnt_set "
2506                  "rx_bytes rx_packets "
2507                  "tx_bytes tx_packets "
2508                  "rx_tcp_bytes rx_tcp_packets "
2509                  "rx_udp_bytes rx_udp_packets "
2510                  "rx_other_bytes rx_other_packets "
2511                  "tx_tcp_bytes tx_tcp_packets "
2512                  "tx_udp_bytes tx_udp_packets "
2513                  "tx_other_bytes tx_other_packets\n");
2514 }
2515
2516 static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
2517                          int cnt_set)
2518 {
2519         int ret;
2520         struct data_counters *cnts;
2521         tag_t tag = ts_entry->tn.tag;
2522         uid_t stat_uid = get_uid_from_tag(tag);
2523         struct proc_print_info *ppi = m->private;
2524         /* Detailed tags are not available to everybody */
2525         if (get_atag_from_tag(tag) && !can_read_other_uid_stats(stat_uid)) {
2526                 CT_DEBUG("qtaguid: stats line: "
2527                          "%s 0x%llx %u: insufficient priv "
2528                          "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2529                          ppi->iface_entry->ifname,
2530                          get_atag_from_tag(tag), stat_uid,
2531                          current->pid, current->tgid, current_fsuid(),
2532                          xt_qtaguid_stats_file->gid);
2533                 return 0;
2534         }
2535         ppi->item_index++;
2536         cnts = &ts_entry->counters;
2537         ret = seq_printf(m, "%d %s 0x%llx %u %u "
2538                 "%llu %llu "
2539                 "%llu %llu "
2540                 "%llu %llu "
2541                 "%llu %llu "
2542                 "%llu %llu "
2543                 "%llu %llu "
2544                 "%llu %llu "
2545                 "%llu %llu\n",
2546                 ppi->item_index,
2547                 ppi->iface_entry->ifname,
2548                 get_atag_from_tag(tag),
2549                 stat_uid,
2550                 cnt_set,
2551                 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2552                 dc_sum_packets(cnts, cnt_set, IFS_RX),
2553                 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2554                 dc_sum_packets(cnts, cnt_set, IFS_TX),
2555                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2556                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2557                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2558                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2559                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2560                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2561                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2562                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2563                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2564                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2565                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2566                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2567         return ret ?: 1;
2568 }
2569
2570 static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
2571 {
2572         int ret;
2573         int counter_set;
2574         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2575              counter_set++) {
2576                 ret = pp_stats_line(m, ts_entry, counter_set);
2577                 if (ret < 0)
2578                         return false;
2579         }
2580         return true;
2581 }
2582
2583 static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
2584 {
2585         struct iface_stat *iface_entry;
2586
2587         if (!ptr)
2588                 return false;
2589
2590         list_for_each_entry(iface_entry, &iface_stat_list, list)
2591                 if (iface_entry == ptr)
2592                         return true;
2593         return false;
2594 }
2595
2596 static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
2597 {
2598         spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2599         list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
2600                 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2601                 return;
2602         }
2603         ppi->iface_entry = NULL;
2604 }
2605
2606 static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
2607 {
2608         struct proc_print_info *ppi = m->private;
2609         struct tag_stat *ts_entry;
2610         struct rb_node *node;
2611
2612         if (!v) {
2613                 pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
2614                 return NULL;
2615         }
2616
2617         (*pos)++;
2618
2619         if (!ppi->iface_entry || unlikely(module_passive))
2620                 return NULL;
2621
2622         if (v == SEQ_START_TOKEN)
2623                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2624         else
2625                 node = rb_next(&((struct tag_stat *)v)->tn.node);
2626
2627         while (!node) {
2628                 qtaguid_stats_proc_next_iface_entry(ppi);
2629                 if (!ppi->iface_entry)
2630                         return NULL;
2631                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2632         }
2633
2634         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2635         ppi->tag = ts_entry->tn.tag;
2636         ppi->tag_pos = *pos;
2637         ppi->tag_item_index = ppi->item_index;
2638         return ts_entry;
2639 }
2640
2641 static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
2642 {
2643         struct proc_print_info *ppi = m->private;
2644         struct tag_stat *ts_entry = NULL;
2645
2646         spin_lock_bh(&iface_stat_list_lock);
2647
2648         if (*pos == 0) {
2649                 ppi->item_index = 1;
2650                 ppi->tag_pos = 0;
2651                 if (list_empty(&iface_stat_list)) {
2652                         ppi->iface_entry = NULL;
2653                 } else {
2654                         ppi->iface_entry = list_first_entry(&iface_stat_list,
2655                                                             struct iface_stat,
2656                                                             list);
2657                         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2658                 }
2659                 return SEQ_START_TOKEN;
2660         }
2661         if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
2662                 if (ppi->iface_entry) {
2663                         pr_err("qtaguid: %s(): iface_entry %p not found\n",
2664                                __func__, ppi->iface_entry);
2665                         ppi->iface_entry = NULL;
2666                 }
2667                 return NULL;
2668         }
2669
2670         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2671
2672         if (!ppi->tag_pos) {
2673                 /* seq_read skipped first next call */
2674                 ts_entry = SEQ_START_TOKEN;
2675         } else {
2676                 ts_entry = tag_stat_tree_search(
2677                                 &ppi->iface_entry->tag_stat_tree, ppi->tag);
2678                 if (!ts_entry) {
2679                         pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2680                                 __func__, ppi->tag);
2681                         return NULL;
2682                 }
2683         }
2684
2685         if (*pos == ppi->tag_pos) { /* normal resume */
2686                 ppi->item_index = ppi->tag_item_index;
2687         } else {
2688                 /* seq_read skipped a next call */
2689                 *pos = ppi->tag_pos;
2690                 ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
2691         }
2692
2693         return ts_entry;
2694 }
2695
2696 static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
2697 {
2698         struct proc_print_info *ppi = m->private;
2699         if (ppi->iface_entry)
2700                 spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2701         spin_unlock_bh(&iface_stat_list_lock);
2702 }
2703
2704 /*
2705  * Procfs reader to get all tag stats using style "1)" as described in
2706  * fs/proc/generic.c
2707  * Groups all protocols tx/rx bytes.
2708  */
2709 static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
2710 {
2711         struct tag_stat *ts_entry = v;
2712
2713         if (v == SEQ_START_TOKEN)
2714                 pp_stats_header(m);
2715         else
2716                 pp_sets(m, ts_entry);
2717
2718         return 0;
2719 }
2720
2721 /*------------------------------------------*/
2722 static int qtudev_open(struct inode *inode, struct file *file)
2723 {
2724         struct uid_tag_data *utd_entry;
2725         struct proc_qtu_data  *pqd_entry;
2726         struct proc_qtu_data  *new_pqd_entry;
2727         int res;
2728         bool utd_entry_found;
2729
2730         if (unlikely(qtu_proc_handling_passive))
2731                 return 0;
2732
2733         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2734                  current->pid, current->tgid, current_fsuid());
2735
2736         spin_lock_bh(&uid_tag_data_tree_lock);
2737
2738         /* Look for existing uid data, or alloc one. */
2739         utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2740         if (IS_ERR_OR_NULL(utd_entry)) {
2741                 res = PTR_ERR(utd_entry);
2742                 goto err_unlock;
2743         }
2744
2745         /* Look for existing PID based proc_data */
2746         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2747                                               current->tgid);
2748         if (pqd_entry) {
2749                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2750                        "%s already opened\n",
2751                        current->pid, current->tgid, current_fsuid(),
2752                        QTU_DEV_NAME);
2753                 res = -EBUSY;
2754                 goto err_unlock_free_utd;
2755         }
2756
2757         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2758         if (!new_pqd_entry) {
2759                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2760                        "proc data alloc failed\n",
2761                        current->pid, current->tgid, current_fsuid());
2762                 res = -ENOMEM;
2763                 goto err_unlock_free_utd;
2764         }
2765         new_pqd_entry->pid = current->tgid;
2766         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2767         new_pqd_entry->parent_tag_data = utd_entry;
2768         utd_entry->num_pqd++;
2769
2770         proc_qtu_data_tree_insert(new_pqd_entry,
2771                                   &proc_qtu_data_tree);
2772
2773         spin_unlock_bh(&uid_tag_data_tree_lock);
2774         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2775                  current_fsuid(), new_pqd_entry);
2776         file->private_data = new_pqd_entry;
2777         return 0;
2778
2779 err_unlock_free_utd:
2780         if (!utd_entry_found) {
2781                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2782                 kfree(utd_entry);
2783         }
2784 err_unlock:
2785         spin_unlock_bh(&uid_tag_data_tree_lock);
2786         return res;
2787 }
2788
2789 static int qtudev_release(struct inode *inode, struct file *file)
2790 {
2791         struct proc_qtu_data  *pqd_entry = file->private_data;
2792         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2793         struct sock_tag *st_entry;
2794         struct rb_root st_to_free_tree = RB_ROOT;
2795         struct list_head *entry, *next;
2796         struct tag_ref *tr;
2797
2798         if (unlikely(qtu_proc_handling_passive))
2799                 return 0;
2800
2801         /*
2802          * Do not trust the current->pid, it might just be a kworker cleaning
2803          * up after a dead proc.
2804          */
2805         DR_DEBUG("qtaguid: qtudev_release(): "
2806                  "pid=%u tgid=%u uid=%u "
2807                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2808                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2809                  pqd_entry, pqd_entry->pid, utd_entry,
2810                  utd_entry->num_active_tags);
2811
2812         spin_lock_bh(&sock_tag_list_lock);
2813         spin_lock_bh(&uid_tag_data_tree_lock);
2814
2815         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2816                 st_entry = list_entry(entry, struct sock_tag, list);
2817                 DR_DEBUG("qtaguid: %s(): "
2818                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2819                          __func__,
2820                          st_entry, st_entry->sk,
2821                          current->pid, current->tgid,
2822                          pqd_entry->parent_tag_data->uid);
2823
2824                 utd_entry = uid_tag_data_tree_search(
2825                         &uid_tag_data_tree,
2826                         get_uid_from_tag(st_entry->tag));
2827                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2828                 DR_DEBUG("qtaguid: %s(): "
2829                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2830                          st_entry->tag, utd_entry);
2831                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2832                                          st_entry->tag);
2833                 BUG_ON(!tr);
2834                 BUG_ON(tr->num_sock_tags <= 0);
2835                 tr->num_sock_tags--;
2836                 free_tag_ref_from_utd_entry(tr, utd_entry);
2837
2838                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2839                 list_del(&st_entry->list);
2840                 /* Can't sockfd_put() within spinlock, do it later. */
2841                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2842
2843                 /*
2844                  * Try to free the utd_entry if no other proc_qtu_data is
2845                  * using it (num_pqd is 0) and it doesn't have active tags
2846                  * (num_active_tags is 0).
2847                  */
2848                 put_utd_entry(utd_entry);
2849         }
2850
2851         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2852         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2853         pqd_entry->parent_tag_data->num_pqd--;
2854         put_utd_entry(pqd_entry->parent_tag_data);
2855         kfree(pqd_entry);
2856         file->private_data = NULL;
2857
2858         spin_unlock_bh(&uid_tag_data_tree_lock);
2859         spin_unlock_bh(&sock_tag_list_lock);
2860
2861
2862         sock_tag_tree_erase(&st_to_free_tree);
2863
2864         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2865                            current->pid, current->tgid);
2866         return 0;
2867 }
2868
2869 /*------------------------------------------*/
2870 static const struct file_operations qtudev_fops = {
2871         .owner = THIS_MODULE,
2872         .open = qtudev_open,
2873         .release = qtudev_release,
2874 };
2875
2876 static struct miscdevice qtu_device = {
2877         .minor = MISC_DYNAMIC_MINOR,
2878         .name = QTU_DEV_NAME,
2879         .fops = &qtudev_fops,
2880         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2881 };
2882
2883 static const struct seq_operations proc_qtaguid_ctrl_seqops = {
2884         .start = qtaguid_ctrl_proc_start,
2885         .next = qtaguid_ctrl_proc_next,
2886         .stop = qtaguid_ctrl_proc_stop,
2887         .show = qtaguid_ctrl_proc_show,
2888 };
2889
2890 static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
2891 {
2892         return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
2893                                 sizeof(struct proc_ctrl_print_info));
2894 }
2895
2896 static const struct file_operations proc_qtaguid_ctrl_fops = {
2897         .open           = proc_qtaguid_ctrl_open,
2898         .read           = seq_read,
2899         .write          = qtaguid_ctrl_proc_write,
2900         .llseek         = seq_lseek,
2901         .release        = seq_release,
2902 };
2903
2904 static const struct seq_operations proc_qtaguid_stats_seqops = {
2905         .start = qtaguid_stats_proc_start,
2906         .next = qtaguid_stats_proc_next,
2907         .stop = qtaguid_stats_proc_stop,
2908         .show = qtaguid_stats_proc_show,
2909 };
2910
2911 static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
2912 {
2913         return seq_open_private(file, &proc_qtaguid_stats_seqops,
2914                                 sizeof(struct proc_print_info));
2915 }
2916
2917 static const struct file_operations proc_qtaguid_stats_fops = {
2918         .open           = proc_qtaguid_stats_open,
2919         .read           = seq_read,
2920         .llseek         = seq_lseek,
2921         .release        = seq_release_private,
2922 };
2923
2924 /*------------------------------------------*/
2925 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2926 {
2927         int ret;
2928         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2929         if (!*res_procdir) {
2930                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2931                 ret = -ENOMEM;
2932                 goto no_dir;
2933         }
2934
2935         xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
2936                                                 *res_procdir,
2937                                                 &proc_qtaguid_ctrl_fops,
2938                                                 NULL);
2939         if (!xt_qtaguid_ctrl_file) {
2940                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2941                         " file\n");
2942                 ret = -ENOMEM;
2943                 goto no_ctrl_entry;
2944         }
2945
2946         xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
2947                                                  *res_procdir,
2948                                                  &proc_qtaguid_stats_fops,
2949                                                  NULL);
2950         if (!xt_qtaguid_stats_file) {
2951                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2952                         "file\n");
2953                 ret = -ENOMEM;
2954                 goto no_stats_entry;
2955         }
2956         /*
2957          * TODO: add support counter hacking
2958          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2959          */
2960         return 0;
2961
2962 no_stats_entry:
2963         remove_proc_entry("ctrl", *res_procdir);
2964 no_ctrl_entry:
2965         remove_proc_entry("xt_qtaguid", NULL);
2966 no_dir:
2967         return ret;
2968 }
2969
2970 static struct xt_match qtaguid_mt_reg __read_mostly = {
2971         /*
2972          * This module masquerades as the "owner" module so that iptables
2973          * tools can deal with it.
2974          */
2975         .name       = "owner",
2976         .revision   = 1,
2977         .family     = NFPROTO_UNSPEC,
2978         .match      = qtaguid_mt,
2979         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2980         .me         = THIS_MODULE,
2981 };
2982
2983 static int __init qtaguid_mt_init(void)
2984 {
2985         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2986             || iface_stat_init(xt_qtaguid_procdir)
2987             || xt_register_match(&qtaguid_mt_reg)
2988             || misc_register(&qtu_device))
2989                 return -1;
2990         return 0;
2991 }
2992
2993 /*
2994  * TODO: allow unloading of the module.
2995  * For now stats are permanent.
2996  * Kconfig forces'y/n' and never an 'm'.
2997  */
2998
2999 module_init(qtaguid_mt_init);
3000 MODULE_AUTHOR("jpa <jpa@google.com>");
3001 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3002 MODULE_LICENSE("GPL");
3003 MODULE_ALIAS("ipt_owner");
3004 MODULE_ALIAS("ip6t_owner");
3005 MODULE_ALIAS("ipt_qtaguid");
3006 MODULE_ALIAS("ip6t_qtaguid");