2 * Kernel iptables module to track stats for packets based on user tags.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
12 * There are run-time debug flags enabled via the debug_mask module param, or
13 * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/skbuff.h>
23 #include <linux/workqueue.h>
24 #include <net/addrconf.h>
29 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
30 #include <linux/netfilter_ipv6/ip6_tables.h>
33 #include <linux/netfilter/xt_socket.h>
34 #include "xt_qtaguid_internal.h"
35 #include "xt_qtaguid_print.h"
38 * We only use the xt_socket funcs within a similar context to avoid unexpected
41 #define XT_SOCKET_SUPPORTED_HOOKS \
42 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
45 static const char *module_procdirname = "xt_qtaguid";
46 static struct proc_dir_entry *xt_qtaguid_procdir;
48 static unsigned int proc_iface_perms = S_IRUGO;
49 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
51 static struct proc_dir_entry *xt_qtaguid_stats_file;
52 static unsigned int proc_stats_perms = S_IRUGO;
53 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
55 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
56 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
57 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
59 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
61 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
63 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
64 #include <linux/android_aid.h>
65 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
66 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
68 /* 0 means, don't limit anybody */
69 static gid_t proc_stats_readall_gid;
70 static gid_t proc_ctrl_write_gid;
72 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
74 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
78 * Limit the number of active tags (via socket tags) for a given UID.
79 * Multiple processes could share the UID.
81 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
82 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
85 * After the kernel has initiallized this module, it is still possible
87 * Setting passive to Y:
88 * - the iface stats handling will not act on notifications.
89 * - iptables matches will never match.
90 * - ctrl commands silently succeed.
91 * - stats are always empty.
92 * This is mostly usefull when a bug is suspected.
94 static bool module_passive;
95 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
98 * Control how qtaguid data is tracked per proc/uid.
99 * Setting tag_tracking_passive to Y:
100 * - don't create proc specific structs to track tags
101 * - don't check that active tag stats exceed some limits.
102 * - don't clean up socket tags on process exits.
103 * This is mostly usefull when a bug is suspected.
105 static bool qtu_proc_handling_passive;
106 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
109 #define QTU_DEV_NAME "xt_qtaguid"
111 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
112 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
114 /*---------------------------------------------------------------------------*/
115 static const char *iface_stat_procdirname = "iface_stat";
116 static struct proc_dir_entry *iface_stat_procdir;
118 * The iface_stat_all* will go away once userspace gets use to the new fields
119 * that have a format line.
121 static const char *iface_stat_all_procfilename = "iface_stat_all";
122 static struct proc_dir_entry *iface_stat_all_procfile;
123 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
124 static struct proc_dir_entry *iface_stat_fmt_procfile;
130 * iface_stat_list_lock
133 * uid_tag_data_tree_lock
134 * tag_counter_set_list_lock
135 * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
138 * Call tree with all lock holders as of 2012-04-27:
140 * iface_stat_fmt_proc_read()
141 * iface_stat_list_lock
142 * (struct iface_stat)
144 * qtaguid_ctrl_proc_read()
147 * (struct proc_qtu_data->sock_tag_list)
148 * prdebug_full_state()
151 * uid_tag_data_tree_lock
152 * (uid_tag_data_tree)
153 * (proc_qtu_data_tree)
154 * iface_stat_list_lock
156 * qtaguid_stats_proc_read()
157 * iface_stat_list_lock
158 * struct iface_stat->tag_stat_list_lock
161 * uid_tag_data_tree_lock
164 * sock_tag_data_list_lock
165 * uid_tag_data_tree_lock
166 * prdebug_full_state()
168 * uid_tag_data_tree_lock
169 * iface_stat_list_lock
171 * iface_netdev_event_handler()
172 * iface_stat_create()
173 * iface_stat_list_lock
174 * iface_stat_update()
175 * iface_stat_list_lock
177 * iface_inetaddr_event_handler()
178 * iface_stat_create()
179 * iface_stat_list_lock
180 * iface_stat_update()
181 * iface_stat_list_lock
183 * iface_inet6addr_event_handler()
184 * iface_stat_create_ipv6()
185 * iface_stat_list_lock
186 * iface_stat_update()
187 * iface_stat_list_lock
191 * if_tag_stat_update()
194 * struct iface_stat->tag_stat_list_lock
196 * get_active_counter_set()
197 * tag_counter_set_list_lock
199 * get_active_counter_set()
200 * tag_counter_set_list_lock
203 * qtaguid_ctrl_parse()
206 * tag_counter_set_list_lock
207 * iface_stat_list_lock
208 * struct iface_stat->tag_stat_list_lock
209 * uid_tag_data_tree_lock
210 * ctrl_cmd_counter_set()
211 * tag_counter_set_list_lock
216 * uid_tag_data_tree_lock
217 * (uid_tag_data_tree)
218 * uid_tag_data_tree_lock
219 * (proc_qtu_data_tree)
222 * uid_tag_data_tree_lock
225 static LIST_HEAD(iface_stat_list);
226 static DEFINE_SPINLOCK(iface_stat_list_lock);
228 static struct rb_root sock_tag_tree = RB_ROOT;
229 static DEFINE_SPINLOCK(sock_tag_list_lock);
231 static struct rb_root tag_counter_set_tree = RB_ROOT;
232 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
234 static struct rb_root uid_tag_data_tree = RB_ROOT;
235 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
237 static struct rb_root proc_qtu_data_tree = RB_ROOT;
238 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
240 static struct qtaguid_event_counts qtu_events;
241 /*----------------------------------------------*/
242 static bool can_manipulate_uids(void)
245 return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
246 || in_egroup_p(proc_ctrl_write_gid);
249 static bool can_impersonate_uid(uid_t uid)
251 return uid == current_fsuid() || can_manipulate_uids();
254 static bool can_read_other_uid_stats(uid_t uid)
257 return unlikely(!current_fsuid()) || uid == current_fsuid()
258 || unlikely(!proc_stats_readall_gid)
259 || in_egroup_p(proc_stats_readall_gid);
262 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
263 enum ifs_tx_rx direction,
264 enum ifs_proto ifs_proto,
268 counters->bpc[set][direction][ifs_proto].bytes += bytes;
269 counters->bpc[set][direction][ifs_proto].packets += packets;
272 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
274 enum ifs_tx_rx direction)
276 return counters->bpc[set][direction][IFS_TCP].bytes
277 + counters->bpc[set][direction][IFS_UDP].bytes
278 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
281 static inline uint64_t dc_sum_packets(struct data_counters *counters,
283 enum ifs_tx_rx direction)
285 return counters->bpc[set][direction][IFS_TCP].packets
286 + counters->bpc[set][direction][IFS_UDP].packets
287 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
290 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
292 struct rb_node *node = root->rb_node;
295 struct tag_node *data = rb_entry(node, struct tag_node, node);
297 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
298 " node=%p data=%p\n", tag, node, data);
299 result = tag_compare(tag, data->tag);
300 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
301 " data.tag=0x%llx (uid=%u) res=%d\n",
302 tag, data->tag, get_uid_from_tag(data->tag), result);
304 node = node->rb_left;
306 node = node->rb_right;
313 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
315 struct rb_node **new = &(root->rb_node), *parent = NULL;
317 /* Figure out where to put new node */
319 struct tag_node *this = rb_entry(*new, struct tag_node,
321 int result = tag_compare(data->tag, this->tag);
322 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
323 " (uid=%u)\n", __func__,
325 get_uid_from_tag(this->tag));
328 new = &((*new)->rb_left);
330 new = &((*new)->rb_right);
335 /* Add new node and rebalance tree. */
336 rb_link_node(&data->node, parent, new);
337 rb_insert_color(&data->node, root);
340 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
342 tag_node_tree_insert(&data->tn, root);
345 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
347 struct tag_node *node = tag_node_tree_search(root, tag);
350 return rb_entry(&node->node, struct tag_stat, tn.node);
353 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
354 struct rb_root *root)
356 tag_node_tree_insert(&data->tn, root);
359 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
362 struct tag_node *node = tag_node_tree_search(root, tag);
365 return rb_entry(&node->node, struct tag_counter_set, tn.node);
369 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
371 tag_node_tree_insert(&data->tn, root);
374 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
376 struct tag_node *node = tag_node_tree_search(root, tag);
379 return rb_entry(&node->node, struct tag_ref, tn.node);
382 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
383 const struct sock *sk)
385 struct rb_node *node = root->rb_node;
388 struct sock_tag *data = rb_entry(node, struct sock_tag,
391 node = node->rb_left;
392 else if (sk > data->sk)
393 node = node->rb_right;
400 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
402 struct rb_node **new = &(root->rb_node), *parent = NULL;
404 /* Figure out where to put new node */
406 struct sock_tag *this = rb_entry(*new, struct sock_tag,
409 if (data->sk < this->sk)
410 new = &((*new)->rb_left);
411 else if (data->sk > this->sk)
412 new = &((*new)->rb_right);
417 /* Add new node and rebalance tree. */
418 rb_link_node(&data->sock_node, parent, new);
419 rb_insert_color(&data->sock_node, root);
422 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
424 struct rb_node *node;
425 struct sock_tag *st_entry;
427 node = rb_first(st_to_free_tree);
429 st_entry = rb_entry(node, struct sock_tag, sock_node);
430 node = rb_next(node);
431 CT_DEBUG("qtaguid: %s(): "
432 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
435 get_uid_from_tag(st_entry->tag));
436 rb_erase(&st_entry->sock_node, st_to_free_tree);
437 sockfd_put(st_entry->socket);
442 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
445 struct rb_node *node = root->rb_node;
448 struct proc_qtu_data *data = rb_entry(node,
449 struct proc_qtu_data,
452 node = node->rb_left;
453 else if (pid > data->pid)
454 node = node->rb_right;
461 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
462 struct rb_root *root)
464 struct rb_node **new = &(root->rb_node), *parent = NULL;
466 /* Figure out where to put new node */
468 struct proc_qtu_data *this = rb_entry(*new,
469 struct proc_qtu_data,
472 if (data->pid < this->pid)
473 new = &((*new)->rb_left);
474 else if (data->pid > this->pid)
475 new = &((*new)->rb_right);
480 /* Add new node and rebalance tree. */
481 rb_link_node(&data->node, parent, new);
482 rb_insert_color(&data->node, root);
485 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
486 struct rb_root *root)
488 struct rb_node **new = &(root->rb_node), *parent = NULL;
490 /* Figure out where to put new node */
492 struct uid_tag_data *this = rb_entry(*new,
496 if (data->uid < this->uid)
497 new = &((*new)->rb_left);
498 else if (data->uid > this->uid)
499 new = &((*new)->rb_right);
504 /* Add new node and rebalance tree. */
505 rb_link_node(&data->node, parent, new);
506 rb_insert_color(&data->node, root);
509 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
512 struct rb_node *node = root->rb_node;
515 struct uid_tag_data *data = rb_entry(node,
519 node = node->rb_left;
520 else if (uid > data->uid)
521 node = node->rb_right;
529 * Allocates a new uid_tag_data struct if needed.
530 * Returns a pointer to the found or allocated uid_tag_data.
531 * Returns a PTR_ERR on failures, and lock is not held.
532 * If found is not NULL:
533 * sets *found to true if not allocated.
534 * sets *found to false if allocated.
536 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
538 struct uid_tag_data *utd_entry;
540 /* Look for top level uid_tag_data for the UID */
541 utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
542 DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
545 *found_res = utd_entry;
549 utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
551 pr_err("qtaguid: get_uid_data(%u): "
552 "tag data alloc failed\n", uid);
553 return ERR_PTR(-ENOMEM);
556 utd_entry->uid = uid;
557 utd_entry->tag_ref_tree = RB_ROOT;
558 uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
559 DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
563 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
564 static struct tag_ref *new_tag_ref(tag_t new_tag,
565 struct uid_tag_data *utd_entry)
567 struct tag_ref *tr_entry;
570 if (utd_entry->num_active_tags + 1 > max_sock_tags) {
571 pr_info("qtaguid: new_tag_ref(0x%llx): "
572 "tag ref alloc quota exceeded. max=%d\n",
573 new_tag, max_sock_tags);
579 tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
581 pr_err("qtaguid: new_tag_ref(0x%llx): "
582 "tag ref alloc failed\n",
587 tr_entry->tn.tag = new_tag;
588 /* tr_entry->num_sock_tags handled by caller */
589 utd_entry->num_active_tags++;
590 tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
591 DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
592 " inserted new tag ref %p\n",
600 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
601 struct uid_tag_data **utd_res)
603 struct uid_tag_data *utd_entry;
604 struct tag_ref *tr_entry;
606 uid_t uid = get_uid_from_tag(full_tag);
608 DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
611 utd_entry = get_uid_data(uid, &found_utd);
612 if (IS_ERR_OR_NULL(utd_entry)) {
614 *utd_res = utd_entry;
618 tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
620 *utd_res = utd_entry;
621 DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
622 full_tag, utd_entry, tr_entry);
626 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
627 static struct tag_ref *get_tag_ref(tag_t full_tag,
628 struct uid_tag_data **utd_res)
630 struct uid_tag_data *utd_entry;
631 struct tag_ref *tr_entry;
633 DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
635 spin_lock_bh(&uid_tag_data_tree_lock);
636 tr_entry = lookup_tag_ref(full_tag, &utd_entry);
637 BUG_ON(IS_ERR_OR_NULL(utd_entry));
639 tr_entry = new_tag_ref(full_tag, utd_entry);
641 spin_unlock_bh(&uid_tag_data_tree_lock);
643 *utd_res = utd_entry;
644 DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
645 full_tag, utd_entry, tr_entry);
649 /* Checks and maybe frees the UID Tag Data entry */
650 static void put_utd_entry(struct uid_tag_data *utd_entry)
652 /* Are we done with the UID tag data entry? */
653 if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
654 !utd_entry->num_pqd) {
655 DR_DEBUG("qtaguid: %s(): "
656 "erase utd_entry=%p uid=%u "
657 "by pid=%u tgid=%u uid=%u\n", __func__,
658 utd_entry, utd_entry->uid,
659 current->pid, current->tgid, current_fsuid());
660 BUG_ON(utd_entry->num_active_tags);
661 rb_erase(&utd_entry->node, &uid_tag_data_tree);
664 DR_DEBUG("qtaguid: %s(): "
665 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
666 __func__, utd_entry, utd_entry->num_active_tags,
668 BUG_ON(!(utd_entry->num_active_tags ||
669 utd_entry->num_pqd));
674 * If no sock_tags are using this tag_ref,
675 * decrements refcount of utd_entry, removes tr_entry
676 * from utd_entry->tag_ref_tree and frees.
678 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
679 struct uid_tag_data *utd_entry)
681 DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
682 tr_entry, tr_entry->tn.tag,
683 get_uid_from_tag(tr_entry->tn.tag));
684 if (!tr_entry->num_sock_tags) {
685 BUG_ON(!utd_entry->num_active_tags);
686 utd_entry->num_active_tags--;
687 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
688 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
693 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
695 struct rb_node *node;
696 struct tag_ref *tr_entry;
699 DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
700 full_tag, get_uid_from_tag(full_tag));
701 acct_tag = get_atag_from_tag(full_tag);
702 node = rb_first(&utd_entry->tag_ref_tree);
704 tr_entry = rb_entry(node, struct tag_ref, tn.node);
705 node = rb_next(node);
706 if (!acct_tag || tr_entry->tn.tag == full_tag)
707 free_tag_ref_from_utd_entry(tr_entry, utd_entry);
711 static int read_proc_u64(char *page, char **start, off_t off,
712 int count, int *eof, void *data)
717 uint64_t *iface_entry = data;
722 value = *iface_entry;
723 p += sprintf(p, "%llu\n", value);
724 len = (p - page) - off;
725 *eof = (len <= count) ? 1 : 0;
730 static int read_proc_bool(char *page, char **start, off_t off,
731 int count, int *eof, void *data)
736 bool *bool_entry = data;
742 p += sprintf(p, "%u\n", value);
743 len = (p - page) - off;
744 *eof = (len <= count) ? 1 : 0;
749 static int get_active_counter_set(tag_t tag)
752 struct tag_counter_set *tcs;
754 MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
756 tag, get_uid_from_tag(tag));
757 /* For now we only handle UID tags for active sets */
758 tag = get_utag_from_tag(tag);
759 spin_lock_bh(&tag_counter_set_list_lock);
760 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
762 active_set = tcs->active_set;
763 spin_unlock_bh(&tag_counter_set_list_lock);
768 * Find the entry for tracking the specified interface.
769 * Caller must hold iface_stat_list_lock
771 static struct iface_stat *get_iface_entry(const char *ifname)
773 struct iface_stat *iface_entry;
775 /* Find the entry for tracking the specified tag within the interface */
776 if (ifname == NULL) {
777 pr_info("qtaguid: iface_stat: get() NULL device name\n");
781 /* Iterate over interfaces */
782 list_for_each_entry(iface_entry, &iface_stat_list, list) {
783 if (!strcmp(ifname, iface_entry->ifname))
791 static int iface_stat_fmt_proc_read(char *page, char **num_items_returned,
792 off_t items_to_skip, int char_count,
793 int *eof, void *data)
798 int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */
799 struct iface_stat *iface_entry;
800 struct rtnl_link_stats64 dev_stats, *stats;
801 struct rtnl_link_stats64 no_dev_stats = {0};
803 if (unlikely(module_passive)) {
808 CT_DEBUG("qtaguid:proc iface_stat_fmt "
809 "pid=%u tgid=%u uid=%u "
810 "page=%p *num_items_returned=%p off=%ld "
811 "char_count=%d *eof=%d\n",
812 current->pid, current->tgid, current_fsuid(),
813 page, *num_items_returned,
814 items_to_skip, char_count, *eof);
819 if (fmt == 2 && item_index++ >= items_to_skip) {
820 len = snprintf(outp, char_count,
822 "total_skb_rx_bytes total_skb_rx_packets "
823 "total_skb_tx_bytes total_skb_tx_packets\n"
825 if (len >= char_count) {
831 (*num_items_returned)++;
835 * This lock will prevent iface_stat_update() from changing active,
836 * and in turn prevent an interface from unregistering itself.
838 spin_lock_bh(&iface_stat_list_lock);
839 list_for_each_entry(iface_entry, &iface_stat_list, list) {
840 if (item_index++ < items_to_skip)
843 if (iface_entry->active) {
844 stats = dev_get_stats(iface_entry->net_dev,
847 stats = &no_dev_stats;
850 * If the meaning of the data changes, then update the fmtX
857 "%llu %llu %llu %llu "
858 "%llu %llu %llu %llu\n",
861 iface_entry->totals_via_dev[IFS_RX].bytes,
862 iface_entry->totals_via_dev[IFS_RX].packets,
863 iface_entry->totals_via_dev[IFS_TX].bytes,
864 iface_entry->totals_via_dev[IFS_TX].packets,
865 stats->rx_bytes, stats->rx_packets,
866 stats->tx_bytes, stats->tx_packets
872 "%llu %llu %llu %llu\n",
874 iface_entry->totals_via_skb[IFS_RX].bytes,
875 iface_entry->totals_via_skb[IFS_RX].packets,
876 iface_entry->totals_via_skb[IFS_TX].bytes,
877 iface_entry->totals_via_skb[IFS_TX].packets
880 if (len >= char_count) {
881 spin_unlock_bh(&iface_stat_list_lock);
887 (*num_items_returned)++;
889 spin_unlock_bh(&iface_stat_list_lock);
895 static void iface_create_proc_worker(struct work_struct *work)
897 struct proc_dir_entry *proc_entry;
898 struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
900 struct iface_stat *new_iface = isw->iface_entry;
902 /* iface_entries are not deleted, so safe to manipulate. */
903 proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
904 if (IS_ERR_OR_NULL(proc_entry)) {
905 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
910 new_iface->proc_ptr = proc_entry;
912 create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
914 &new_iface->totals_via_dev[IFS_TX].bytes);
915 create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
917 &new_iface->totals_via_dev[IFS_RX].bytes);
918 create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
920 &new_iface->totals_via_dev[IFS_TX].packets);
921 create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
923 &new_iface->totals_via_dev[IFS_RX].packets);
924 create_proc_read_entry("active", proc_iface_perms, proc_entry,
925 read_proc_bool, &new_iface->active);
927 IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
928 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
933 * Will set the entry's active state, and
934 * update the net_dev accordingly also.
936 static void _iface_stat_set_active(struct iface_stat *entry,
937 struct net_device *net_dev,
941 entry->net_dev = net_dev;
942 entry->active = true;
943 IF_DEBUG("qtaguid: %s(%s): "
944 "enable tracking. rfcnt=%d\n", __func__,
946 __this_cpu_read(*net_dev->pcpu_refcnt));
948 entry->active = false;
949 entry->net_dev = NULL;
950 IF_DEBUG("qtaguid: %s(%s): "
951 "disable tracking. rfcnt=%d\n", __func__,
953 __this_cpu_read(*net_dev->pcpu_refcnt));
958 /* Caller must hold iface_stat_list_lock */
959 static struct iface_stat *iface_alloc(struct net_device *net_dev)
961 struct iface_stat *new_iface;
962 struct iface_stat_work *isw;
964 new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
965 if (new_iface == NULL) {
966 pr_err("qtaguid: iface_stat: create(%s): "
967 "iface_stat alloc failed\n", net_dev->name);
970 new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
971 if (new_iface->ifname == NULL) {
972 pr_err("qtaguid: iface_stat: create(%s): "
973 "ifname alloc failed\n", net_dev->name);
977 spin_lock_init(&new_iface->tag_stat_list_lock);
978 new_iface->tag_stat_tree = RB_ROOT;
979 _iface_stat_set_active(new_iface, net_dev, true);
982 * ipv6 notifier chains are atomic :(
983 * No create_proc_read_entry() for you!
985 isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
987 pr_err("qtaguid: iface_stat: create(%s): "
988 "work alloc failed\n", new_iface->ifname);
989 _iface_stat_set_active(new_iface, net_dev, false);
990 kfree(new_iface->ifname);
994 isw->iface_entry = new_iface;
995 INIT_WORK(&isw->iface_work, iface_create_proc_worker);
996 schedule_work(&isw->iface_work);
997 list_add(&new_iface->list, &iface_stat_list);
1001 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
1002 struct iface_stat *iface)
1004 struct rtnl_link_stats64 dev_stats, *stats;
1007 stats = dev_get_stats(net_dev, &dev_stats);
1008 /* No empty packets */
1010 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
1011 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
1013 IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
1014 "bytes rx/tx=%llu/%llu "
1015 "active=%d last_known=%d "
1016 "stats_rewound=%d\n", __func__,
1017 net_dev ? net_dev->name : "?",
1019 stats->rx_bytes, stats->tx_bytes,
1020 iface->active, iface->last_known_valid, stats_rewound);
1022 if (iface->active && iface->last_known_valid && stats_rewound) {
1023 pr_warn_once("qtaguid: iface_stat: %s(%s): "
1024 "iface reset its stats unexpectedly\n", __func__,
1027 iface->totals_via_dev[IFS_TX].bytes +=
1028 iface->last_known[IFS_TX].bytes;
1029 iface->totals_via_dev[IFS_TX].packets +=
1030 iface->last_known[IFS_TX].packets;
1031 iface->totals_via_dev[IFS_RX].bytes +=
1032 iface->last_known[IFS_RX].bytes;
1033 iface->totals_via_dev[IFS_RX].packets +=
1034 iface->last_known[IFS_RX].packets;
1035 iface->last_known_valid = false;
1036 IF_DEBUG("qtaguid: %s(%s): iface=%p "
1037 "used last known bytes rx/tx=%llu/%llu\n", __func__,
1038 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
1039 iface->last_known[IFS_TX].bytes);
1044 * Create a new entry for tracking the specified interface.
1045 * Do nothing if the entry already exists.
1046 * Called when an interface is configured with a valid IP address.
1048 static void iface_stat_create(struct net_device *net_dev,
1049 struct in_ifaddr *ifa)
1051 struct in_device *in_dev = NULL;
1053 struct iface_stat *entry;
1055 struct iface_stat *new_iface;
1057 IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1058 net_dev ? net_dev->name : "?",
1061 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1065 ifname = net_dev->name;
1067 in_dev = in_dev_get(net_dev);
1069 pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1073 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1075 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1076 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1077 "ifa=%p ifa_label=%s\n",
1079 ifa->ifa_label ? ifa->ifa_label : "(null)");
1080 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1086 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1090 ipaddr = ifa->ifa_local;
1092 spin_lock_bh(&iface_stat_list_lock);
1093 entry = get_iface_entry(ifname);
1094 if (entry != NULL) {
1095 bool activate = !ipv4_is_loopback(ipaddr);
1096 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1098 iface_check_stats_reset_and_adjust(net_dev, entry);
1099 _iface_stat_set_active(entry, net_dev, activate);
1100 IF_DEBUG("qtaguid: %s(%s): "
1101 "tracking now %d on ip=%pI4\n", __func__,
1102 entry->ifname, activate, &ipaddr);
1103 goto done_unlock_put;
1104 } else if (ipv4_is_loopback(ipaddr)) {
1105 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1106 "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
1107 goto done_unlock_put;
1110 new_iface = iface_alloc(net_dev);
1111 IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1112 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1114 spin_unlock_bh(&iface_stat_list_lock);
1120 static void iface_stat_create_ipv6(struct net_device *net_dev,
1121 struct inet6_ifaddr *ifa)
1123 struct in_device *in_dev;
1125 struct iface_stat *entry;
1126 struct iface_stat *new_iface;
1129 IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1130 ifa, net_dev, net_dev ? net_dev->name : "");
1132 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1135 ifname = net_dev->name;
1137 in_dev = in_dev_get(net_dev);
1139 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1144 IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1148 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1152 addr_type = ipv6_addr_type(&ifa->addr);
1154 spin_lock_bh(&iface_stat_list_lock);
1155 entry = get_iface_entry(ifname);
1156 if (entry != NULL) {
1157 bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
1158 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1160 iface_check_stats_reset_and_adjust(net_dev, entry);
1161 _iface_stat_set_active(entry, net_dev, activate);
1162 IF_DEBUG("qtaguid: %s(%s): "
1163 "tracking now %d on ip=%pI6c\n", __func__,
1164 entry->ifname, activate, &ifa->addr);
1165 goto done_unlock_put;
1166 } else if (addr_type & IPV6_ADDR_LOOPBACK) {
1167 IF_DEBUG("qtaguid: %s(%s): "
1168 "ignore loopback dev. ip=%pI6c\n", __func__,
1169 ifname, &ifa->addr);
1170 goto done_unlock_put;
1173 new_iface = iface_alloc(net_dev);
1174 IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1175 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1178 spin_unlock_bh(&iface_stat_list_lock);
1183 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1185 MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1186 return sock_tag_tree_search(&sock_tag_tree, sk);
1189 static struct sock_tag *get_sock_stat(const struct sock *sk)
1191 struct sock_tag *sock_tag_entry;
1192 MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1195 spin_lock_bh(&sock_tag_list_lock);
1196 sock_tag_entry = get_sock_stat_nl(sk);
1197 spin_unlock_bh(&sock_tag_list_lock);
1198 return sock_tag_entry;
1201 static int ipx_proto(const struct sk_buff *skb,
1202 struct xt_action_param *par)
1204 int thoff = 0, tproto;
1206 switch (par->family) {
1208 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1210 MT_DEBUG("%s(): transport header not found in ipv6"
1211 " skb=%p\n", __func__, skb);
1214 tproto = ip_hdr(skb)->protocol;
1217 tproto = IPPROTO_RAW;
1223 data_counters_update(struct data_counters *dc, int set,
1224 enum ifs_tx_rx direction, int proto, int bytes)
1228 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1231 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1235 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1242 * Update stats for the specified interface. Do nothing if the entry
1243 * does not exist (when a device was never configured with an IP address).
1244 * Called when an device is being unregistered.
1246 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1248 struct rtnl_link_stats64 dev_stats, *stats;
1249 struct iface_stat *entry;
1251 stats = dev_get_stats(net_dev, &dev_stats);
1252 spin_lock_bh(&iface_stat_list_lock);
1253 entry = get_iface_entry(net_dev->name);
1254 if (entry == NULL) {
1255 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1257 spin_unlock_bh(&iface_stat_list_lock);
1261 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1262 net_dev->name, entry);
1263 if (!entry->active) {
1264 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1266 spin_unlock_bh(&iface_stat_list_lock);
1271 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1272 entry->last_known[IFS_TX].packets = stats->tx_packets;
1273 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1274 entry->last_known[IFS_RX].packets = stats->rx_packets;
1275 entry->last_known_valid = true;
1276 IF_DEBUG("qtaguid: %s(%s): "
1277 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1278 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1279 spin_unlock_bh(&iface_stat_list_lock);
1282 entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1283 entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1284 entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1285 entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1286 /* We don't need the last_known[] anymore */
1287 entry->last_known_valid = false;
1288 _iface_stat_set_active(entry, net_dev, false);
1289 IF_DEBUG("qtaguid: %s(%s): "
1290 "disable tracking. rx/tx=%llu/%llu\n", __func__,
1291 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1292 spin_unlock_bh(&iface_stat_list_lock);
1296 * Update stats for the specified interface from the skb.
1297 * Do nothing if the entry
1298 * does not exist (when a device was never configured with an IP address).
1299 * Called on each sk.
1301 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1302 struct xt_action_param *par)
1304 struct iface_stat *entry;
1305 const struct net_device *el_dev;
1306 enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1307 int bytes = skb->len;
1310 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1311 el_dev = par->in ? : par->out;
1313 const struct net_device *other_dev;
1315 other_dev = par->in ? : par->out;
1316 if (el_dev != other_dev) {
1317 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1318 "par->(in/out)=%p %s\n",
1319 par->hooknum, el_dev, el_dev->name, other_dev,
1324 if (unlikely(!el_dev)) {
1325 pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
1326 par->hooknum, __func__);
1328 } else if (unlikely(!el_dev->name)) {
1329 pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
1330 par->hooknum, __func__);
1333 int proto = ipx_proto(skb, par);
1334 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1335 par->hooknum, el_dev->name, el_dev->type,
1336 par->family, proto);
1339 spin_lock_bh(&iface_stat_list_lock);
1340 entry = get_iface_entry(el_dev->name);
1341 if (entry == NULL) {
1342 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1343 __func__, el_dev->name);
1344 spin_unlock_bh(&iface_stat_list_lock);
1348 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1349 el_dev->name, entry);
1351 entry->totals_via_skb[direction].bytes += bytes;
1352 entry->totals_via_skb[direction].packets++;
1353 spin_unlock_bh(&iface_stat_list_lock);
1356 static void tag_stat_update(struct tag_stat *tag_entry,
1357 enum ifs_tx_rx direction, int proto, int bytes)
1360 active_set = get_active_counter_set(tag_entry->tn.tag);
1361 MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1362 "dir=%d proto=%d bytes=%d)\n",
1363 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1364 active_set, direction, proto, bytes);
1365 data_counters_update(&tag_entry->counters, active_set, direction,
1367 if (tag_entry->parent_counters)
1368 data_counters_update(tag_entry->parent_counters, active_set,
1369 direction, proto, bytes);
1373 * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1375 * iface_entry->tag_stat_list_lock should be held.
1377 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1380 struct tag_stat *new_tag_stat_entry = NULL;
1381 IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1382 " (uid=%u)\n", __func__,
1383 iface_entry, tag, get_uid_from_tag(tag));
1384 new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1385 if (!new_tag_stat_entry) {
1386 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1389 new_tag_stat_entry->tn.tag = tag;
1390 tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1392 return new_tag_stat_entry;
1395 static void if_tag_stat_update(const char *ifname, uid_t uid,
1396 const struct sock *sk, enum ifs_tx_rx direction,
1397 int proto, int bytes)
1399 struct tag_stat *tag_stat_entry;
1400 tag_t tag, acct_tag;
1402 struct data_counters *uid_tag_counters;
1403 struct sock_tag *sock_tag_entry;
1404 struct iface_stat *iface_entry;
1405 struct tag_stat *new_tag_stat = NULL;
1406 MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1407 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1408 ifname, uid, sk, direction, proto, bytes);
1411 iface_entry = get_iface_entry(ifname);
1413 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1417 /* It is ok to process data when an iface_entry is inactive */
1419 MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1420 ifname, iface_entry);
1423 * Look for a tagged sock.
1424 * It will have an acct_uid.
1426 sock_tag_entry = get_sock_stat(sk);
1427 if (sock_tag_entry) {
1428 tag = sock_tag_entry->tag;
1429 acct_tag = get_atag_from_tag(tag);
1430 uid_tag = get_utag_from_tag(tag);
1432 acct_tag = make_atag_from_value(0);
1433 tag = combine_atag_with_uid(acct_tag, uid);
1434 uid_tag = make_tag_from_uid(uid);
1436 MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1437 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1438 tag, get_uid_from_tag(tag), iface_entry);
1439 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1440 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1442 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1444 if (tag_stat_entry) {
1446 * Updating the {acct_tag, uid_tag} entry handles both stats:
1447 * {0, uid_tag} will also get updated.
1449 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1450 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1454 /* Loop over tag list under this interface for {0,uid_tag} */
1455 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1457 if (!tag_stat_entry) {
1458 /* Here: the base uid_tag did not exist */
1460 * No parent counters. So
1461 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1463 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1464 uid_tag_counters = &new_tag_stat->counters;
1466 uid_tag_counters = &tag_stat_entry->counters;
1470 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1471 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1472 new_tag_stat->parent_counters = uid_tag_counters;
1475 * For new_tag_stat to be still NULL here would require:
1476 * {0, uid_tag} exists
1477 * and {acct_tag, uid_tag} doesn't exist
1478 * AND acct_tag == 0.
1479 * Impossible. This reassures us that new_tag_stat
1480 * below will always be assigned.
1482 BUG_ON(!new_tag_stat);
1484 tag_stat_update(new_tag_stat, direction, proto, bytes);
1485 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1488 static int iface_netdev_event_handler(struct notifier_block *nb,
1489 unsigned long event, void *ptr) {
1490 struct net_device *dev = ptr;
1492 if (unlikely(module_passive))
1495 IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1496 "ev=0x%lx/%s netdev=%p->name=%s\n",
1497 event, netdev_evt_str(event), dev, dev ? dev->name : "");
1501 iface_stat_create(dev, NULL);
1502 atomic64_inc(&qtu_events.iface_events);
1505 case NETDEV_UNREGISTER:
1506 iface_stat_update(dev, event == NETDEV_DOWN);
1507 atomic64_inc(&qtu_events.iface_events);
1513 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1514 unsigned long event, void *ptr)
1516 struct inet6_ifaddr *ifa = ptr;
1517 struct net_device *dev;
1519 if (unlikely(module_passive))
1522 IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1523 "ev=0x%lx/%s ifa=%p\n",
1524 event, netdev_evt_str(event), ifa);
1528 BUG_ON(!ifa || !ifa->idev);
1529 dev = (struct net_device *)ifa->idev->dev;
1530 iface_stat_create_ipv6(dev, ifa);
1531 atomic64_inc(&qtu_events.iface_events);
1534 case NETDEV_UNREGISTER:
1535 BUG_ON(!ifa || !ifa->idev);
1536 dev = (struct net_device *)ifa->idev->dev;
1537 iface_stat_update(dev, event == NETDEV_DOWN);
1538 atomic64_inc(&qtu_events.iface_events);
1544 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1545 unsigned long event, void *ptr)
1547 struct in_ifaddr *ifa = ptr;
1548 struct net_device *dev;
1550 if (unlikely(module_passive))
1553 IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1554 "ev=0x%lx/%s ifa=%p\n",
1555 event, netdev_evt_str(event), ifa);
1559 BUG_ON(!ifa || !ifa->ifa_dev);
1560 dev = ifa->ifa_dev->dev;
1561 iface_stat_create(dev, ifa);
1562 atomic64_inc(&qtu_events.iface_events);
1565 case NETDEV_UNREGISTER:
1566 BUG_ON(!ifa || !ifa->ifa_dev);
1567 dev = ifa->ifa_dev->dev;
1568 iface_stat_update(dev, event == NETDEV_DOWN);
1569 atomic64_inc(&qtu_events.iface_events);
1575 static struct notifier_block iface_netdev_notifier_blk = {
1576 .notifier_call = iface_netdev_event_handler,
1579 static struct notifier_block iface_inetaddr_notifier_blk = {
1580 .notifier_call = iface_inetaddr_event_handler,
1583 static struct notifier_block iface_inet6addr_notifier_blk = {
1584 .notifier_call = iface_inet6addr_event_handler,
1587 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1591 iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1592 if (!iface_stat_procdir) {
1593 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1598 iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1601 if (!iface_stat_all_procfile) {
1602 pr_err("qtaguid: iface_stat: init "
1603 " failed to create stat_old proc entry\n");
1607 iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read;
1608 iface_stat_all_procfile->data = (void *)1; /* fmt1 */
1610 iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename,
1613 if (!iface_stat_fmt_procfile) {
1614 pr_err("qtaguid: iface_stat: init "
1615 " failed to create stat_all proc entry\n");
1617 goto err_zap_all_stats_entry;
1619 iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read;
1620 iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */
1623 err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1625 pr_err("qtaguid: iface_stat: init "
1626 "failed to register dev event handler\n");
1627 goto err_zap_all_stats_entries;
1629 err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1631 pr_err("qtaguid: iface_stat: init "
1632 "failed to register ipv4 dev event handler\n");
1636 err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1638 pr_err("qtaguid: iface_stat: init "
1639 "failed to register ipv6 dev event handler\n");
1640 goto err_unreg_ip4_addr;
1645 unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1647 unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1648 err_zap_all_stats_entries:
1649 remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1650 err_zap_all_stats_entry:
1651 remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1653 remove_proc_entry(iface_stat_procdirname, parent_procdir);
1658 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1659 struct xt_action_param *par)
1662 unsigned int hook_mask = (1 << par->hooknum);
1664 MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1665 par->hooknum, par->family);
1668 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1669 * return garbage SKs.
1671 if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1674 switch (par->family) {
1676 sk = xt_socket_get6_sk(skb, par);
1679 sk = xt_socket_get4_sk(skb, par);
1686 * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1687 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1688 * Not fixed in 3.0-r3 :(
1691 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1692 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1693 if (sk->sk_state == TCP_TIME_WAIT) {
1694 xt_socket_put_sk(sk);
1701 static void account_for_uid(const struct sk_buff *skb,
1702 const struct sock *alternate_sk, uid_t uid,
1703 struct xt_action_param *par)
1705 const struct net_device *el_dev;
1708 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1709 el_dev = par->in ? : par->out;
1711 const struct net_device *other_dev;
1713 other_dev = par->in ? : par->out;
1714 if (el_dev != other_dev) {
1715 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1716 "par->(in/out)=%p %s\n",
1717 par->hooknum, el_dev, el_dev->name, other_dev,
1722 if (unlikely(!el_dev)) {
1723 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1724 } else if (unlikely(!el_dev->name)) {
1725 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1727 int proto = ipx_proto(skb, par);
1728 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1729 par->hooknum, el_dev->name, el_dev->type,
1730 par->family, proto);
1732 if_tag_stat_update(el_dev->name, uid,
1733 skb->sk ? skb->sk : alternate_sk,
1734 par->in ? IFS_RX : IFS_TX,
1739 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1741 const struct xt_qtaguid_match_info *info = par->matchinfo;
1742 const struct file *filp;
1743 bool got_sock = false;
1748 if (unlikely(module_passive))
1749 return (info->match ^ info->invert) == 0;
1751 MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1752 par->hooknum, skb, par->in, par->out, par->family);
1754 atomic64_inc(&qtu_events.match_calls);
1756 res = (info->match ^ info->invert) == 0;
1760 switch (par->hooknum) {
1761 case NF_INET_PRE_ROUTING:
1762 case NF_INET_POST_ROUTING:
1763 atomic64_inc(&qtu_events.match_calls_prepost);
1764 iface_stat_update_from_skb(skb, par);
1766 * We are done in pre/post. The skb will get processed
1769 res = (info->match ^ info->invert);
1772 /* default: Fall through and do UID releated work */
1778 * A missing sk->sk_socket happens when packets are in-flight
1779 * and the matching socket is already closed and gone.
1781 sk = qtaguid_find_sk(skb, par);
1783 * If we got the socket from the find_sk(), we will need to put
1784 * it back, as nf_tproxy_get_sock_v4() got it.
1788 atomic64_inc(&qtu_events.match_found_sk_in_ct);
1790 atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1792 atomic64_inc(&qtu_events.match_found_sk);
1794 MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1795 par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1797 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1798 par->hooknum, sk, sk->sk_socket,
1799 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1800 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1801 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1802 par->hooknum, filp ? filp->f_cred->fsuid : -1);
1805 if (sk == NULL || sk->sk_socket == NULL) {
1807 * Here, the qtaguid_find_sk() using connection tracking
1808 * couldn't find the owner, so for now we just count them
1809 * against the system.
1812 * TODO: unhack how to force just accounting.
1813 * For now we only do iface stats when the uid-owner is not
1816 if (!(info->match & XT_QTAGUID_UID))
1817 account_for_uid(skb, sk, 0, par);
1818 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1820 sk ? sk->sk_socket : NULL);
1821 res = (info->match ^ info->invert) == 0;
1822 atomic64_inc(&qtu_events.match_no_sk);
1823 goto put_sock_ret_res;
1824 } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1826 goto put_sock_ret_res;
1828 filp = sk->sk_socket->file;
1830 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1831 account_for_uid(skb, sk, 0, par);
1832 res = ((info->match ^ info->invert) &
1833 (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1834 atomic64_inc(&qtu_events.match_no_sk_file);
1835 goto put_sock_ret_res;
1837 sock_uid = filp->f_cred->fsuid;
1839 * TODO: unhack how to force just accounting.
1840 * For now we only do iface stats when the uid-owner is not requested
1842 if (!(info->match & XT_QTAGUID_UID))
1843 account_for_uid(skb, sk, sock_uid, par);
1846 * The following two tests fail the match when:
1847 * id not in range AND no inverted condition requested
1848 * or id in range AND inverted condition requested
1849 * Thus (!a && b) || (a && !b) == a ^ b
1851 if (info->match & XT_QTAGUID_UID)
1852 if ((filp->f_cred->fsuid >= info->uid_min &&
1853 filp->f_cred->fsuid <= info->uid_max) ^
1854 !(info->invert & XT_QTAGUID_UID)) {
1855 MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1858 goto put_sock_ret_res;
1860 if (info->match & XT_QTAGUID_GID)
1861 if ((filp->f_cred->fsgid >= info->gid_min &&
1862 filp->f_cred->fsgid <= info->gid_max) ^
1863 !(info->invert & XT_QTAGUID_GID)) {
1864 MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1867 goto put_sock_ret_res;
1870 MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1875 xt_socket_put_sk(sk);
1877 MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1882 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1883 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1889 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1892 fmt_buff = kasprintf(GFP_ATOMIC,
1893 "qtaguid: %s(): %s {\n", __func__, fmt);
1895 va_start(args, fmt);
1896 buff = kvasprintf(GFP_ATOMIC,
1899 pr_debug("%s", buff);
1904 spin_lock_bh(&sock_tag_list_lock);
1905 prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1906 spin_unlock_bh(&sock_tag_list_lock);
1908 spin_lock_bh(&sock_tag_list_lock);
1909 spin_lock_bh(&uid_tag_data_tree_lock);
1910 prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1911 prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1912 spin_unlock_bh(&uid_tag_data_tree_lock);
1913 spin_unlock_bh(&sock_tag_list_lock);
1915 spin_lock_bh(&iface_stat_list_lock);
1916 prdebug_iface_stat_list(indent_level, &iface_stat_list);
1917 spin_unlock_bh(&iface_stat_list_lock);
1919 pr_debug("qtaguid: %s(): }\n", __func__);
1922 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1926 * Procfs reader to get all active socket tags using style "1)" as described in
1929 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1930 off_t items_to_skip, int char_count, int *eof,
1936 struct rb_node *node;
1937 struct sock_tag *sock_tag_entry;
1939 int indent_level = 0;
1942 if (unlikely(module_passive)) {
1950 CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u "
1951 "page=%p off=%ld char_count=%d *eof=%d\n",
1952 current->pid, current->tgid, current_fsuid(),
1953 page, items_to_skip, char_count, *eof);
1955 spin_lock_bh(&sock_tag_list_lock);
1956 for (node = rb_first(&sock_tag_tree);
1958 node = rb_next(node)) {
1959 if (item_index++ < items_to_skip)
1961 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1962 uid = get_uid_from_tag(sock_tag_entry->tag);
1963 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1966 sock_tag_entry->tag,
1970 f_count = atomic_long_read(
1971 &sock_tag_entry->socket->file->f_count);
1972 len = snprintf(outp, char_count,
1973 "sock=%p tag=0x%llx (uid=%u) pid=%u "
1976 sock_tag_entry->tag, uid,
1977 sock_tag_entry->pid, f_count);
1978 if (len >= char_count) {
1979 spin_unlock_bh(&sock_tag_list_lock);
1985 (*num_items_returned)++;
1987 spin_unlock_bh(&sock_tag_list_lock);
1989 if (item_index++ >= items_to_skip) {
1990 len = snprintf(outp, char_count,
1991 "events: sockets_tagged=%llu "
1992 "sockets_untagged=%llu "
1993 "counter_set_changes=%llu "
1995 "iface_events=%llu "
1997 "match_calls_prepost=%llu "
1998 "match_found_sk=%llu "
1999 "match_found_sk_in_ct=%llu "
2000 "match_found_no_sk_in_ct=%llu "
2002 "match_no_sk_file=%llu\n",
2003 atomic64_read(&qtu_events.sockets_tagged),
2004 atomic64_read(&qtu_events.sockets_untagged),
2005 atomic64_read(&qtu_events.counter_set_changes),
2006 atomic64_read(&qtu_events.delete_cmds),
2007 atomic64_read(&qtu_events.iface_events),
2008 atomic64_read(&qtu_events.match_calls),
2009 atomic64_read(&qtu_events.match_calls_prepost),
2010 atomic64_read(&qtu_events.match_found_sk),
2011 atomic64_read(&qtu_events.match_found_sk_in_ct),
2013 &qtu_events.match_found_no_sk_in_ct),
2014 atomic64_read(&qtu_events.match_no_sk),
2015 atomic64_read(&qtu_events.match_no_sk_file));
2016 if (len >= char_count) {
2022 (*num_items_returned)++;
2025 /* Count the following as part of the last item_index */
2026 if (item_index > items_to_skip) {
2027 prdebug_full_state(indent_level, "proc ctrl");
2035 * Delete socket tags, and stat tags associated with a given
2036 * accouting tag and uid.
2038 static int ctrl_cmd_delete(const char *input)
2046 struct iface_stat *iface_entry;
2047 struct rb_node *node;
2048 struct sock_tag *st_entry;
2049 struct rb_root st_to_free_tree = RB_ROOT;
2050 struct tag_stat *ts_entry;
2051 struct tag_counter_set *tcs_entry;
2052 struct tag_ref *tr_entry;
2053 struct uid_tag_data *utd_entry;
2055 argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
2056 CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
2057 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2063 if (!valid_atag(acct_tag)) {
2064 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2069 uid = current_fsuid();
2070 } else if (!can_impersonate_uid(uid)) {
2071 pr_info("qtaguid: ctrl_delete(%s): "
2072 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2073 input, current->pid, current->tgid, current_fsuid());
2078 tag = combine_atag_with_uid(acct_tag, uid);
2079 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2080 "looking for tag=0x%llx (uid=%u)\n",
2083 /* Delete socket tags */
2084 spin_lock_bh(&sock_tag_list_lock);
2085 node = rb_first(&sock_tag_tree);
2087 st_entry = rb_entry(node, struct sock_tag, sock_node);
2088 entry_uid = get_uid_from_tag(st_entry->tag);
2089 node = rb_next(node);
2090 if (entry_uid != uid)
2093 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2094 input, st_entry->tag, entry_uid);
2096 if (!acct_tag || st_entry->tag == tag) {
2097 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2098 /* Can't sockfd_put() within spinlock, do it later. */
2099 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2100 tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2101 BUG_ON(tr_entry->num_sock_tags <= 0);
2102 tr_entry->num_sock_tags--;
2104 * TODO: remove if, and start failing.
2105 * This is a hack to work around the fact that in some
2106 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2107 * and are trying to work around apps
2108 * that didn't open the /dev/xt_qtaguid.
2110 if (st_entry->list.next && st_entry->list.prev)
2111 list_del(&st_entry->list);
2114 spin_unlock_bh(&sock_tag_list_lock);
2116 sock_tag_tree_erase(&st_to_free_tree);
2118 /* Delete tag counter-sets */
2119 spin_lock_bh(&tag_counter_set_list_lock);
2120 /* Counter sets are only on the uid tag, not full tag */
2121 tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2123 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2124 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2127 get_uid_from_tag(tcs_entry->tn.tag),
2128 tcs_entry->active_set);
2129 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2132 spin_unlock_bh(&tag_counter_set_list_lock);
2135 * If acct_tag is 0, then all entries belonging to uid are
2138 spin_lock_bh(&iface_stat_list_lock);
2139 list_for_each_entry(iface_entry, &iface_stat_list, list) {
2140 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2141 node = rb_first(&iface_entry->tag_stat_tree);
2143 ts_entry = rb_entry(node, struct tag_stat, tn.node);
2144 entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2145 node = rb_next(node);
2147 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2148 "ts tag=0x%llx (uid=%u)\n",
2149 input, ts_entry->tn.tag, entry_uid);
2151 if (entry_uid != uid)
2153 if (!acct_tag || ts_entry->tn.tag == tag) {
2154 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2155 "erase ts: %s 0x%llx %u\n",
2156 input, iface_entry->ifname,
2157 get_atag_from_tag(ts_entry->tn.tag),
2159 rb_erase(&ts_entry->tn.node,
2160 &iface_entry->tag_stat_tree);
2164 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2166 spin_unlock_bh(&iface_stat_list_lock);
2168 /* Cleanup the uid_tag_data */
2169 spin_lock_bh(&uid_tag_data_tree_lock);
2170 node = rb_first(&uid_tag_data_tree);
2172 utd_entry = rb_entry(node, struct uid_tag_data, node);
2173 entry_uid = utd_entry->uid;
2174 node = rb_next(node);
2176 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2180 if (entry_uid != uid)
2183 * Go over the tag_refs, and those that don't have
2184 * sock_tags using them are freed.
2186 put_tag_ref_tree(tag, utd_entry);
2187 put_utd_entry(utd_entry);
2189 spin_unlock_bh(&uid_tag_data_tree_lock);
2191 atomic64_inc(&qtu_events.delete_cmds);
2198 static int ctrl_cmd_counter_set(const char *input)
2204 struct tag_counter_set *tcs;
2207 argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2208 CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2209 "set=%d uid=%u\n", input, argc, cmd,
2215 if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2216 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2221 if (!can_manipulate_uids()) {
2222 pr_info("qtaguid: ctrl_counterset(%s): "
2223 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2224 input, current->pid, current->tgid, current_fsuid());
2229 tag = make_tag_from_uid(uid);
2230 spin_lock_bh(&tag_counter_set_list_lock);
2231 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2233 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2235 spin_unlock_bh(&tag_counter_set_list_lock);
2236 pr_err("qtaguid: ctrl_counterset(%s): "
2237 "failed to alloc counter set\n",
2243 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2244 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2245 "(uid=%u) set=%d\n",
2246 input, tag, get_uid_from_tag(tag), counter_set);
2248 tcs->active_set = counter_set;
2249 spin_unlock_bh(&tag_counter_set_list_lock);
2250 atomic64_inc(&qtu_events.counter_set_changes);
2257 static int ctrl_cmd_tag(const char *input)
2262 tag_t acct_tag = make_atag_from_value(0);
2264 struct socket *el_socket;
2266 struct sock_tag *sock_tag_entry;
2267 struct tag_ref *tag_ref_entry;
2268 struct uid_tag_data *uid_tag_data_entry;
2269 struct proc_qtu_data *pqd_entry;
2271 /* Unassigned args will get defaulted later. */
2272 argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2273 CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2274 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2280 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2282 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2283 " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2284 input, sock_fd, res, current->pid, current->tgid,
2288 CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2289 input, atomic_long_read(&el_socket->file->f_count),
2292 acct_tag = make_atag_from_value(0);
2293 } else if (!valid_atag(acct_tag)) {
2294 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2298 CT_DEBUG("qtaguid: ctrl_tag(%s): "
2299 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2300 "in_group=%d in_egroup=%d\n",
2301 input, current->pid, current->tgid, current_uid(),
2302 current_euid(), current_fsuid(),
2303 in_group_p(proc_ctrl_write_gid),
2304 in_egroup_p(proc_ctrl_write_gid));
2306 uid = current_fsuid();
2307 } else if (!can_impersonate_uid(uid)) {
2308 pr_info("qtaguid: ctrl_tag(%s): "
2309 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2310 input, current->pid, current->tgid, current_fsuid());
2314 full_tag = combine_atag_with_uid(acct_tag, uid);
2316 spin_lock_bh(&sock_tag_list_lock);
2317 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2318 tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2319 if (IS_ERR(tag_ref_entry)) {
2320 res = PTR_ERR(tag_ref_entry);
2321 spin_unlock_bh(&sock_tag_list_lock);
2324 tag_ref_entry->num_sock_tags++;
2325 if (sock_tag_entry) {
2326 struct tag_ref *prev_tag_ref_entry;
2328 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2329 "st@%p ...->f_count=%ld\n",
2330 input, el_socket->sk, sock_tag_entry,
2331 atomic_long_read(&el_socket->file->f_count));
2333 * This is a re-tagging, so release the sock_fd that was
2334 * locked at the time of the 1st tagging.
2335 * There is still the ref from this call's sockfd_lookup() so
2336 * it can be done within the spinlock.
2338 sockfd_put(sock_tag_entry->socket);
2339 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2340 &uid_tag_data_entry);
2341 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2342 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2343 prev_tag_ref_entry->num_sock_tags--;
2344 sock_tag_entry->tag = full_tag;
2346 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2347 input, el_socket->sk);
2348 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2350 if (!sock_tag_entry) {
2351 pr_err("qtaguid: ctrl_tag(%s): "
2352 "socket tag alloc failed\n",
2354 spin_unlock_bh(&sock_tag_list_lock);
2356 goto err_tag_unref_put;
2358 sock_tag_entry->sk = el_socket->sk;
2359 sock_tag_entry->socket = el_socket;
2360 sock_tag_entry->pid = current->tgid;
2361 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2363 spin_lock_bh(&uid_tag_data_tree_lock);
2364 pqd_entry = proc_qtu_data_tree_search(
2365 &proc_qtu_data_tree, current->tgid);
2367 * TODO: remove if, and start failing.
2368 * At first, we want to catch user-space code that is not
2369 * opening the /dev/xt_qtaguid.
2371 if (IS_ERR_OR_NULL(pqd_entry))
2374 "User space forgot to open /dev/xt_qtaguid? "
2375 "pid=%u tgid=%u uid=%u\n", __func__,
2376 current->pid, current->tgid,
2379 list_add(&sock_tag_entry->list,
2380 &pqd_entry->sock_tag_list);
2381 spin_unlock_bh(&uid_tag_data_tree_lock);
2383 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2384 atomic64_inc(&qtu_events.sockets_tagged);
2386 spin_unlock_bh(&sock_tag_list_lock);
2387 /* We keep the ref to the socket (file) until it is untagged */
2388 CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2389 input, sock_tag_entry,
2390 atomic_long_read(&el_socket->file->f_count));
2394 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2395 tag_ref_entry->num_sock_tags--;
2396 free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2398 CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2399 input, atomic_long_read(&el_socket->file->f_count) - 1);
2400 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2401 sockfd_put(el_socket);
2405 CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2409 static int ctrl_cmd_untag(const char *input)
2413 struct socket *el_socket;
2415 struct sock_tag *sock_tag_entry;
2416 struct tag_ref *tag_ref_entry;
2417 struct uid_tag_data *utd_entry;
2418 struct proc_qtu_data *pqd_entry;
2420 argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2421 CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2422 input, argc, cmd, sock_fd);
2427 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2429 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2430 " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2431 input, sock_fd, res, current->pid, current->tgid,
2435 CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2436 input, atomic_long_read(&el_socket->file->f_count),
2438 spin_lock_bh(&sock_tag_list_lock);
2439 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2440 if (!sock_tag_entry) {
2441 spin_unlock_bh(&sock_tag_list_lock);
2446 * The socket already belongs to the current process
2447 * so it can do whatever it wants to it.
2449 rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2451 tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2452 BUG_ON(!tag_ref_entry);
2453 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2454 spin_lock_bh(&uid_tag_data_tree_lock);
2455 pqd_entry = proc_qtu_data_tree_search(
2456 &proc_qtu_data_tree, current->tgid);
2458 * TODO: remove if, and start failing.
2459 * At first, we want to catch user-space code that is not
2460 * opening the /dev/xt_qtaguid.
2462 if (IS_ERR_OR_NULL(pqd_entry))
2463 pr_warn_once("qtaguid: %s(): "
2464 "User space forgot to open /dev/xt_qtaguid? "
2465 "pid=%u tgid=%u uid=%u\n", __func__,
2466 current->pid, current->tgid, current_fsuid());
2468 list_del(&sock_tag_entry->list);
2469 spin_unlock_bh(&uid_tag_data_tree_lock);
2471 * We don't free tag_ref from the utd_entry here,
2472 * only during a cmd_delete().
2474 tag_ref_entry->num_sock_tags--;
2475 spin_unlock_bh(&sock_tag_list_lock);
2477 * Release the sock_fd that was grabbed at tag time,
2478 * and once more for the sockfd_lookup() here.
2480 sockfd_put(sock_tag_entry->socket);
2481 CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2482 input, sock_tag_entry,
2483 atomic_long_read(&el_socket->file->f_count) - 1);
2484 sockfd_put(el_socket);
2486 kfree(sock_tag_entry);
2487 atomic64_inc(&qtu_events.sockets_untagged);
2492 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2493 input, atomic_long_read(&el_socket->file->f_count) - 1);
2494 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2495 sockfd_put(el_socket);
2499 CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2503 static int qtaguid_ctrl_parse(const char *input, int count)
2508 CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2509 input, current->pid, current->tgid, current_fsuid());
2512 /* Collect params for commands */
2515 res = ctrl_cmd_delete(input);
2519 res = ctrl_cmd_counter_set(input);
2523 res = ctrl_cmd_tag(input);
2527 res = ctrl_cmd_untag(input);
2537 CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2541 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2542 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2543 unsigned long count, void *data)
2545 char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2547 if (unlikely(module_passive))
2550 if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2553 if (copy_from_user(input_buf, buffer, count))
2556 input_buf[count] = '\0';
2557 return qtaguid_ctrl_parse(input_buf, count);
2560 struct proc_print_info {
2562 char **num_items_returned;
2563 struct iface_stat *iface_entry;
2564 struct tag_stat *ts_entry;
2570 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2573 struct data_counters *cnts;
2575 if (!ppi->item_index) {
2576 if (ppi->item_index++ < ppi->items_to_skip)
2578 len = snprintf(ppi->outp, ppi->char_count,
2579 "idx iface acct_tag_hex uid_tag_int cnt_set "
2580 "rx_bytes rx_packets "
2581 "tx_bytes tx_packets "
2582 "rx_tcp_bytes rx_tcp_packets "
2583 "rx_udp_bytes rx_udp_packets "
2584 "rx_other_bytes rx_other_packets "
2585 "tx_tcp_bytes tx_tcp_packets "
2586 "tx_udp_bytes tx_udp_packets "
2587 "tx_other_bytes tx_other_packets\n");
2589 tag_t tag = ppi->ts_entry->tn.tag;
2590 uid_t stat_uid = get_uid_from_tag(tag);
2592 if (!can_read_other_uid_stats(stat_uid)) {
2593 CT_DEBUG("qtaguid: stats line: "
2594 "%s 0x%llx %u: insufficient priv "
2595 "from pid=%u tgid=%u uid=%u\n",
2596 ppi->iface_entry->ifname,
2597 get_atag_from_tag(tag), stat_uid,
2598 current->pid, current->tgid, current_fsuid());
2601 if (ppi->item_index++ < ppi->items_to_skip)
2603 cnts = &ppi->ts_entry->counters;
2605 ppi->outp, ppi->char_count,
2606 "%d %s 0x%llx %u %u "
2616 ppi->iface_entry->ifname,
2617 get_atag_from_tag(tag),
2620 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2621 dc_sum_packets(cnts, cnt_set, IFS_RX),
2622 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2623 dc_sum_packets(cnts, cnt_set, IFS_TX),
2624 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2625 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2626 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2627 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2628 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2629 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2630 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2631 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2632 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2633 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2634 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2635 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2640 static bool pp_sets(struct proc_print_info *ppi)
2644 for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2646 len = pp_stats_line(ppi, counter_set);
2647 if (len >= ppi->char_count) {
2653 ppi->char_count -= len;
2654 (*ppi->num_items_returned)++;
2661 * Procfs reader to get all tag stats using style "1)" as described in
2663 * Groups all protocols tx/rx bytes.
2665 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2666 off_t items_to_skip, int char_count, int *eof,
2669 struct proc_print_info ppi;
2674 ppi.char_count = char_count;
2675 ppi.num_items_returned = num_items_returned;
2676 ppi.items_to_skip = items_to_skip;
2678 if (unlikely(module_passive)) {
2679 len = pp_stats_line(&ppi, 0);
2680 /* The header should always be shorter than the buffer. */
2681 BUG_ON(len >= ppi.char_count);
2682 (*num_items_returned)++;
2687 CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u "
2688 "page=%p *num_items_returned=%p off=%ld "
2689 "char_count=%d *eof=%d\n",
2690 current->pid, current->tgid, current_fsuid(),
2691 page, *num_items_returned,
2692 items_to_skip, char_count, *eof);
2697 /* The idx is there to help debug when things go belly up. */
2698 len = pp_stats_line(&ppi, 0);
2699 /* Don't advance the outp unless the whole line was printed */
2700 if (len >= ppi.char_count) {
2702 return ppi.outp - page;
2706 ppi.char_count -= len;
2707 (*num_items_returned)++;
2710 spin_lock_bh(&iface_stat_list_lock);
2711 list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2712 struct rb_node *node;
2713 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2714 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2716 node = rb_next(node)) {
2717 ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2718 if (!pp_sets(&ppi)) {
2720 &ppi.iface_entry->tag_stat_list_lock);
2721 spin_unlock_bh(&iface_stat_list_lock);
2722 return ppi.outp - page;
2725 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2727 spin_unlock_bh(&iface_stat_list_lock);
2730 return ppi.outp - page;
2733 /*------------------------------------------*/
2734 static int qtudev_open(struct inode *inode, struct file *file)
2736 struct uid_tag_data *utd_entry;
2737 struct proc_qtu_data *pqd_entry;
2738 struct proc_qtu_data *new_pqd_entry;
2740 bool utd_entry_found;
2742 if (unlikely(qtu_proc_handling_passive))
2745 DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2746 current->pid, current->tgid, current_fsuid());
2748 spin_lock_bh(&uid_tag_data_tree_lock);
2750 /* Look for existing uid data, or alloc one. */
2751 utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2752 if (IS_ERR_OR_NULL(utd_entry)) {
2753 res = PTR_ERR(utd_entry);
2757 /* Look for existing PID based proc_data */
2758 pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2761 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2762 "%s already opened\n",
2763 current->pid, current->tgid, current_fsuid(),
2766 goto err_unlock_free_utd;
2769 new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2770 if (!new_pqd_entry) {
2771 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2772 "proc data alloc failed\n",
2773 current->pid, current->tgid, current_fsuid());
2775 goto err_unlock_free_utd;
2777 new_pqd_entry->pid = current->tgid;
2778 INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2779 new_pqd_entry->parent_tag_data = utd_entry;
2780 utd_entry->num_pqd++;
2782 proc_qtu_data_tree_insert(new_pqd_entry,
2783 &proc_qtu_data_tree);
2785 spin_unlock_bh(&uid_tag_data_tree_lock);
2786 DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2787 current_fsuid(), new_pqd_entry);
2788 file->private_data = new_pqd_entry;
2791 err_unlock_free_utd:
2792 if (!utd_entry_found) {
2793 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2796 spin_unlock_bh(&uid_tag_data_tree_lock);
2801 static int qtudev_release(struct inode *inode, struct file *file)
2803 struct proc_qtu_data *pqd_entry = file->private_data;
2804 struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
2805 struct sock_tag *st_entry;
2806 struct rb_root st_to_free_tree = RB_ROOT;
2807 struct list_head *entry, *next;
2810 if (unlikely(qtu_proc_handling_passive))
2814 * Do not trust the current->pid, it might just be a kworker cleaning
2815 * up after a dead proc.
2817 DR_DEBUG("qtaguid: qtudev_release(): "
2818 "pid=%u tgid=%u uid=%u "
2819 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2820 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2821 pqd_entry, pqd_entry->pid, utd_entry,
2822 utd_entry->num_active_tags);
2824 spin_lock_bh(&sock_tag_list_lock);
2825 spin_lock_bh(&uid_tag_data_tree_lock);
2827 list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2828 st_entry = list_entry(entry, struct sock_tag, list);
2829 DR_DEBUG("qtaguid: %s(): "
2830 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2832 st_entry, st_entry->sk,
2833 current->pid, current->tgid,
2834 pqd_entry->parent_tag_data->uid);
2836 utd_entry = uid_tag_data_tree_search(
2838 get_uid_from_tag(st_entry->tag));
2839 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2840 DR_DEBUG("qtaguid: %s(): "
2841 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2842 st_entry->tag, utd_entry);
2843 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2846 BUG_ON(tr->num_sock_tags <= 0);
2847 tr->num_sock_tags--;
2848 free_tag_ref_from_utd_entry(tr, utd_entry);
2850 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2851 list_del(&st_entry->list);
2852 /* Can't sockfd_put() within spinlock, do it later. */
2853 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2856 * Try to free the utd_entry if no other proc_qtu_data is
2857 * using it (num_pqd is 0) and it doesn't have active tags
2858 * (num_active_tags is 0).
2860 put_utd_entry(utd_entry);
2863 rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2864 BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2865 pqd_entry->parent_tag_data->num_pqd--;
2866 put_utd_entry(pqd_entry->parent_tag_data);
2868 file->private_data = NULL;
2870 spin_unlock_bh(&uid_tag_data_tree_lock);
2871 spin_unlock_bh(&sock_tag_list_lock);
2874 sock_tag_tree_erase(&st_to_free_tree);
2876 prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2877 current->pid, current->tgid);
2881 /*------------------------------------------*/
2882 static const struct file_operations qtudev_fops = {
2883 .owner = THIS_MODULE,
2884 .open = qtudev_open,
2885 .release = qtudev_release,
2888 static struct miscdevice qtu_device = {
2889 .minor = MISC_DYNAMIC_MINOR,
2890 .name = QTU_DEV_NAME,
2891 .fops = &qtudev_fops,
2892 /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2895 /*------------------------------------------*/
2896 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2899 *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2900 if (!*res_procdir) {
2901 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2906 xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2908 if (!xt_qtaguid_ctrl_file) {
2909 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2914 xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2915 xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2917 xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2919 if (!xt_qtaguid_stats_file) {
2920 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2923 goto no_stats_entry;
2925 xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2927 * TODO: add support counter hacking
2928 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2933 remove_proc_entry("ctrl", *res_procdir);
2935 remove_proc_entry("xt_qtaguid", NULL);
2940 static struct xt_match qtaguid_mt_reg __read_mostly = {
2942 * This module masquerades as the "owner" module so that iptables
2943 * tools can deal with it.
2947 .family = NFPROTO_UNSPEC,
2948 .match = qtaguid_mt,
2949 .matchsize = sizeof(struct xt_qtaguid_match_info),
2953 static int __init qtaguid_mt_init(void)
2955 if (qtaguid_proc_register(&xt_qtaguid_procdir)
2956 || iface_stat_init(xt_qtaguid_procdir)
2957 || xt_register_match(&qtaguid_mt_reg)
2958 || misc_register(&qtu_device))
2964 * TODO: allow unloading of the module.
2965 * For now stats are permanent.
2966 * Kconfig forces'y/n' and never an 'm'.
2969 module_init(qtaguid_mt_init);
2970 MODULE_AUTHOR("jpa <jpa@google.com>");
2971 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2972 MODULE_LICENSE("GPL");
2973 MODULE_ALIAS("ipt_owner");
2974 MODULE_ALIAS("ip6t_owner");
2975 MODULE_ALIAS("ipt_qtaguid");
2976 MODULE_ALIAS("ip6t_qtaguid");