2 * Kernel iptables module to track stats for packets based on user tags.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
12 * There are run-time debug flags enabled via the debug_mask module param, or
13 * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/skbuff.h>
23 #include <linux/workqueue.h>
24 #include <net/addrconf.h>
29 #include <linux/netfilter/xt_socket.h>
30 #include "xt_qtaguid_internal.h"
31 #include "xt_qtaguid_print.h"
34 * We only use the xt_socket funcs within a similar context to avoid unexpected
37 #define XT_SOCKET_SUPPORTED_HOOKS \
38 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
41 static const char *module_procdirname = "xt_qtaguid";
42 static struct proc_dir_entry *xt_qtaguid_procdir;
44 static unsigned int proc_iface_perms = S_IRUGO;
45 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
47 static struct proc_dir_entry *xt_qtaguid_stats_file;
48 static unsigned int proc_stats_perms = S_IRUGO;
49 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
51 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
52 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
53 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
55 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
57 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
59 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
60 #include <linux/android_aid.h>
61 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
62 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
64 /* 0 means, don't limit anybody */
65 static gid_t proc_stats_readall_gid;
66 static gid_t proc_ctrl_write_gid;
68 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
70 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
74 * Limit the number of active tags (via socket tags) for a given UID.
75 * Multiple processes could share the UID.
77 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
78 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
81 * After the kernel has initiallized this module, it is still possible
83 * Setting passive to Y:
84 * - the iface stats handling will not act on notifications.
85 * - iptables matches will never match.
86 * - ctrl commands silently succeed.
87 * - stats are always empty.
88 * This is mostly usefull when a bug is suspected.
90 static bool module_passive;
91 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
94 * Control how qtaguid data is tracked per proc/uid.
95 * Setting tag_tracking_passive to Y:
96 * - don't create proc specific structs to track tags
97 * - don't check that active tag stats exceed some limits.
98 * - don't clean up socket tags on process exits.
99 * This is mostly usefull when a bug is suspected.
101 static bool qtu_proc_handling_passive;
102 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
105 #define QTU_DEV_NAME "xt_qtaguid"
107 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
108 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
110 /*---------------------------------------------------------------------------*/
111 static const char *iface_stat_procdirname = "iface_stat";
112 static struct proc_dir_entry *iface_stat_procdir;
113 static const char *iface_stat_all_procfilename = "iface_stat_all";
114 static struct proc_dir_entry *iface_stat_all_procfile;
119 * iface_stat_list_lock
122 * uid_tag_data_tree_lock
123 * tag_counter_set_list_lock
124 * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
127 * Call tree with all lock holders as of 2011-09-25:
129 * iface_stat_all_proc_read()
130 * iface_stat_list_lock
131 * (struct iface_stat)
133 * qtaguid_ctrl_proc_read()
136 * (struct proc_qtu_data->sock_tag_list)
137 * prdebug_full_state()
140 * uid_tag_data_tree_lock
141 * (uid_tag_data_tree)
142 * (proc_qtu_data_tree)
143 * iface_stat_list_lock
145 * qtaguid_stats_proc_read()
146 * iface_stat_list_lock
147 * struct iface_stat->tag_stat_list_lock
150 * uid_tag_data_tree_lock
153 * sock_tag_data_list_lock
154 * uid_tag_data_tree_lock
155 * prdebug_full_state()
157 * uid_tag_data_tree_lock
158 * iface_stat_list_lock
160 * iface_netdev_event_handler()
161 * iface_stat_create()
162 * iface_stat_list_lock
163 * iface_stat_update()
164 * iface_stat_list_lock
166 * iface_inetaddr_event_handler()
167 * iface_stat_create()
168 * iface_stat_list_lock
169 * iface_stat_update()
170 * iface_stat_list_lock
172 * iface_inet6addr_event_handler()
173 * iface_stat_create_ipv6()
174 * iface_stat_list_lock
175 * iface_stat_update()
176 * iface_stat_list_lock
180 * if_tag_stat_update()
183 * struct iface_stat->tag_stat_list_lock
185 * get_active_counter_set()
186 * tag_counter_set_list_lock
188 * get_active_counter_set()
189 * tag_counter_set_list_lock
192 * qtaguid_ctrl_parse()
195 * tag_counter_set_list_lock
196 * iface_stat_list_lock
197 * struct iface_stat->tag_stat_list_lock
198 * uid_tag_data_tree_lock
199 * ctrl_cmd_counter_set()
200 * tag_counter_set_list_lock
205 * uid_tag_data_tree_lock
206 * (uid_tag_data_tree)
207 * uid_tag_data_tree_lock
208 * (proc_qtu_data_tree)
211 * uid_tag_data_tree_lock
214 static LIST_HEAD(iface_stat_list);
215 static DEFINE_SPINLOCK(iface_stat_list_lock);
217 static struct rb_root sock_tag_tree = RB_ROOT;
218 static DEFINE_SPINLOCK(sock_tag_list_lock);
220 static struct rb_root tag_counter_set_tree = RB_ROOT;
221 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
223 static struct rb_root uid_tag_data_tree = RB_ROOT;
224 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
226 static struct rb_root proc_qtu_data_tree = RB_ROOT;
227 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
229 static struct qtaguid_event_counts qtu_events;
230 /*----------------------------------------------*/
231 static bool can_manipulate_uids(void)
234 return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
235 || in_egroup_p(proc_ctrl_write_gid);
238 static bool can_impersonate_uid(uid_t uid)
240 return uid == current_fsuid() || can_manipulate_uids();
243 static bool can_read_other_uid_stats(uid_t uid)
246 return unlikely(!current_fsuid()) || uid == current_fsuid()
247 || unlikely(!proc_stats_readall_gid)
248 || in_egroup_p(proc_stats_readall_gid);
251 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
252 enum ifs_tx_rx direction,
253 enum ifs_proto ifs_proto,
257 counters->bpc[set][direction][ifs_proto].bytes += bytes;
258 counters->bpc[set][direction][ifs_proto].packets += packets;
261 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
263 enum ifs_tx_rx direction)
265 return counters->bpc[set][direction][IFS_TCP].bytes
266 + counters->bpc[set][direction][IFS_UDP].bytes
267 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
270 static inline uint64_t dc_sum_packets(struct data_counters *counters,
272 enum ifs_tx_rx direction)
274 return counters->bpc[set][direction][IFS_TCP].packets
275 + counters->bpc[set][direction][IFS_UDP].packets
276 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
279 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
281 struct rb_node *node = root->rb_node;
284 struct tag_node *data = rb_entry(node, struct tag_node, node);
286 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
287 " node=%p data=%p\n", tag, node, data);
288 result = tag_compare(tag, data->tag);
289 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
290 " data.tag=0x%llx (uid=%u) res=%d\n",
291 tag, data->tag, get_uid_from_tag(data->tag), result);
293 node = node->rb_left;
295 node = node->rb_right;
302 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
304 struct rb_node **new = &(root->rb_node), *parent = NULL;
306 /* Figure out where to put new node */
308 struct tag_node *this = rb_entry(*new, struct tag_node,
310 int result = tag_compare(data->tag, this->tag);
311 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
312 " (uid=%u)\n", __func__,
314 get_uid_from_tag(this->tag));
317 new = &((*new)->rb_left);
319 new = &((*new)->rb_right);
324 /* Add new node and rebalance tree. */
325 rb_link_node(&data->node, parent, new);
326 rb_insert_color(&data->node, root);
329 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
331 tag_node_tree_insert(&data->tn, root);
334 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
336 struct tag_node *node = tag_node_tree_search(root, tag);
339 return rb_entry(&node->node, struct tag_stat, tn.node);
342 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
343 struct rb_root *root)
345 tag_node_tree_insert(&data->tn, root);
348 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
351 struct tag_node *node = tag_node_tree_search(root, tag);
354 return rb_entry(&node->node, struct tag_counter_set, tn.node);
358 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
360 tag_node_tree_insert(&data->tn, root);
363 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
365 struct tag_node *node = tag_node_tree_search(root, tag);
368 return rb_entry(&node->node, struct tag_ref, tn.node);
371 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
372 const struct sock *sk)
374 struct rb_node *node = root->rb_node;
377 struct sock_tag *data = rb_entry(node, struct sock_tag,
380 node = node->rb_left;
381 else if (sk > data->sk)
382 node = node->rb_right;
389 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
391 struct rb_node **new = &(root->rb_node), *parent = NULL;
393 /* Figure out where to put new node */
395 struct sock_tag *this = rb_entry(*new, struct sock_tag,
398 if (data->sk < this->sk)
399 new = &((*new)->rb_left);
400 else if (data->sk > this->sk)
401 new = &((*new)->rb_right);
406 /* Add new node and rebalance tree. */
407 rb_link_node(&data->sock_node, parent, new);
408 rb_insert_color(&data->sock_node, root);
411 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
413 struct rb_node *node;
414 struct sock_tag *st_entry;
416 node = rb_first(st_to_free_tree);
418 st_entry = rb_entry(node, struct sock_tag, sock_node);
419 node = rb_next(node);
420 CT_DEBUG("qtaguid: %s(): "
421 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
424 get_uid_from_tag(st_entry->tag));
425 rb_erase(&st_entry->sock_node, st_to_free_tree);
426 sockfd_put(st_entry->socket);
431 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
434 struct rb_node *node = root->rb_node;
437 struct proc_qtu_data *data = rb_entry(node,
438 struct proc_qtu_data,
441 node = node->rb_left;
442 else if (pid > data->pid)
443 node = node->rb_right;
450 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
451 struct rb_root *root)
453 struct rb_node **new = &(root->rb_node), *parent = NULL;
455 /* Figure out where to put new node */
457 struct proc_qtu_data *this = rb_entry(*new,
458 struct proc_qtu_data,
461 if (data->pid < this->pid)
462 new = &((*new)->rb_left);
463 else if (data->pid > this->pid)
464 new = &((*new)->rb_right);
469 /* Add new node and rebalance tree. */
470 rb_link_node(&data->node, parent, new);
471 rb_insert_color(&data->node, root);
474 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
475 struct rb_root *root)
477 struct rb_node **new = &(root->rb_node), *parent = NULL;
479 /* Figure out where to put new node */
481 struct uid_tag_data *this = rb_entry(*new,
485 if (data->uid < this->uid)
486 new = &((*new)->rb_left);
487 else if (data->uid > this->uid)
488 new = &((*new)->rb_right);
493 /* Add new node and rebalance tree. */
494 rb_link_node(&data->node, parent, new);
495 rb_insert_color(&data->node, root);
498 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
501 struct rb_node *node = root->rb_node;
504 struct uid_tag_data *data = rb_entry(node,
508 node = node->rb_left;
509 else if (uid > data->uid)
510 node = node->rb_right;
518 * Allocates a new uid_tag_data struct if needed.
519 * Returns a pointer to the found or allocated uid_tag_data.
520 * Returns a PTR_ERR on failures, and lock is not held.
521 * If found is not NULL:
522 * sets *found to true if not allocated.
523 * sets *found to false if allocated.
525 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
527 struct uid_tag_data *utd_entry;
529 /* Look for top level uid_tag_data for the UID */
530 utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
531 DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
534 *found_res = utd_entry;
538 utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
540 pr_err("qtaguid: get_uid_data(%u): "
541 "tag data alloc failed\n", uid);
542 return ERR_PTR(-ENOMEM);
545 utd_entry->uid = uid;
546 utd_entry->tag_ref_tree = RB_ROOT;
547 uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
548 DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
552 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
553 static struct tag_ref *new_tag_ref(tag_t new_tag,
554 struct uid_tag_data *utd_entry)
556 struct tag_ref *tr_entry;
559 if (utd_entry->num_active_tags + 1 > max_sock_tags) {
560 pr_info("qtaguid: new_tag_ref(0x%llx): "
561 "tag ref alloc quota exceeded. max=%d\n",
562 new_tag, max_sock_tags);
568 tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
570 pr_err("qtaguid: new_tag_ref(0x%llx): "
571 "tag ref alloc failed\n",
576 tr_entry->tn.tag = new_tag;
577 /* tr_entry->num_sock_tags handled by caller */
578 utd_entry->num_active_tags++;
579 tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
580 DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
581 " inserted new tag ref %p\n",
589 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
590 struct uid_tag_data **utd_res)
592 struct uid_tag_data *utd_entry;
593 struct tag_ref *tr_entry;
595 uid_t uid = get_uid_from_tag(full_tag);
597 DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
600 utd_entry = get_uid_data(uid, &found_utd);
601 if (IS_ERR_OR_NULL(utd_entry)) {
603 *utd_res = utd_entry;
607 tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
609 *utd_res = utd_entry;
610 DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
611 full_tag, utd_entry, tr_entry);
615 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
616 static struct tag_ref *get_tag_ref(tag_t full_tag,
617 struct uid_tag_data **utd_res)
619 struct uid_tag_data *utd_entry;
620 struct tag_ref *tr_entry;
622 DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
624 spin_lock_bh(&uid_tag_data_tree_lock);
625 tr_entry = lookup_tag_ref(full_tag, &utd_entry);
626 BUG_ON(IS_ERR_OR_NULL(utd_entry));
628 tr_entry = new_tag_ref(full_tag, utd_entry);
630 spin_unlock_bh(&uid_tag_data_tree_lock);
632 *utd_res = utd_entry;
633 DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
634 full_tag, utd_entry, tr_entry);
638 /* Checks and maybe frees the UID Tag Data entry */
639 static void put_utd_entry(struct uid_tag_data *utd_entry)
641 /* Are we done with the UID tag data entry? */
642 if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
643 !utd_entry->num_pqd) {
644 DR_DEBUG("qtaguid: %s(): "
645 "erase utd_entry=%p uid=%u "
646 "by pid=%u tgid=%u uid=%u\n", __func__,
647 utd_entry, utd_entry->uid,
648 current->pid, current->tgid, current_fsuid());
649 BUG_ON(utd_entry->num_active_tags);
650 rb_erase(&utd_entry->node, &uid_tag_data_tree);
653 DR_DEBUG("qtaguid: %s(): "
654 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
655 __func__, utd_entry, utd_entry->num_active_tags,
657 BUG_ON(!(utd_entry->num_active_tags ||
658 utd_entry->num_pqd));
663 * If no sock_tags are using this tag_ref,
664 * decrements refcount of utd_entry, removes tr_entry
665 * from utd_entry->tag_ref_tree and frees.
667 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
668 struct uid_tag_data *utd_entry)
670 DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
671 tr_entry, tr_entry->tn.tag,
672 get_uid_from_tag(tr_entry->tn.tag));
673 if (!tr_entry->num_sock_tags) {
674 BUG_ON(!utd_entry->num_active_tags);
675 utd_entry->num_active_tags--;
676 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
677 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
682 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
684 struct rb_node *node;
685 struct tag_ref *tr_entry;
688 DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
689 full_tag, get_uid_from_tag(full_tag));
690 acct_tag = get_atag_from_tag(full_tag);
691 node = rb_first(&utd_entry->tag_ref_tree);
693 tr_entry = rb_entry(node, struct tag_ref, tn.node);
694 node = rb_next(node);
695 if (!acct_tag || tr_entry->tn.tag == full_tag)
696 free_tag_ref_from_utd_entry(tr_entry, utd_entry);
700 static int read_proc_u64(char *page, char **start, off_t off,
701 int count, int *eof, void *data)
706 uint64_t *iface_entry = data;
711 value = *iface_entry;
712 p += sprintf(p, "%llu\n", value);
713 len = (p - page) - off;
714 *eof = (len <= count) ? 1 : 0;
719 static int read_proc_bool(char *page, char **start, off_t off,
720 int count, int *eof, void *data)
725 bool *bool_entry = data;
731 p += sprintf(p, "%u\n", value);
732 len = (p - page) - off;
733 *eof = (len <= count) ? 1 : 0;
738 static int get_active_counter_set(tag_t tag)
741 struct tag_counter_set *tcs;
743 MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
745 tag, get_uid_from_tag(tag));
746 /* For now we only handle UID tags for active sets */
747 tag = get_utag_from_tag(tag);
748 spin_lock_bh(&tag_counter_set_list_lock);
749 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
751 active_set = tcs->active_set;
752 spin_unlock_bh(&tag_counter_set_list_lock);
757 * Find the entry for tracking the specified interface.
758 * Caller must hold iface_stat_list_lock
760 static struct iface_stat *get_iface_entry(const char *ifname)
762 struct iface_stat *iface_entry;
764 /* Find the entry for tracking the specified tag within the interface */
765 if (ifname == NULL) {
766 pr_info("qtaguid: iface_stat: get() NULL device name\n");
770 /* Iterate over interfaces */
771 list_for_each_entry(iface_entry, &iface_stat_list, list) {
772 if (!strcmp(ifname, iface_entry->ifname))
780 static int iface_stat_all_proc_read(char *page, char **num_items_returned,
781 off_t items_to_skip, int char_count,
782 int *eof, void *data)
787 struct iface_stat *iface_entry;
788 struct rtnl_link_stats64 dev_stats, *stats;
789 struct rtnl_link_stats64 no_dev_stats = {0};
791 if (unlikely(module_passive)) {
796 CT_DEBUG("qtaguid:proc iface_stat_all "
797 "page=%p *num_items_returned=%p off=%ld "
798 "char_count=%d *eof=%d\n", page, *num_items_returned,
799 items_to_skip, char_count, *eof);
805 * This lock will prevent iface_stat_update() from changing active,
806 * and in turn prevent an interface from unregistering itself.
808 spin_lock_bh(&iface_stat_list_lock);
809 list_for_each_entry(iface_entry, &iface_stat_list, list) {
810 if (item_index++ < items_to_skip)
813 if (iface_entry->active) {
814 stats = dev_get_stats(iface_entry->net_dev,
817 stats = &no_dev_stats;
819 len = snprintf(outp, char_count,
821 "%llu %llu %llu %llu "
822 "%llu %llu %llu %llu\n",
825 iface_entry->totals[IFS_RX].bytes,
826 iface_entry->totals[IFS_RX].packets,
827 iface_entry->totals[IFS_TX].bytes,
828 iface_entry->totals[IFS_TX].packets,
829 stats->rx_bytes, stats->rx_packets,
830 stats->tx_bytes, stats->tx_packets);
831 if (len >= char_count) {
832 spin_unlock_bh(&iface_stat_list_lock);
838 (*num_items_returned)++;
840 spin_unlock_bh(&iface_stat_list_lock);
846 static void iface_create_proc_worker(struct work_struct *work)
848 struct proc_dir_entry *proc_entry;
849 struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
851 struct iface_stat *new_iface = isw->iface_entry;
853 /* iface_entries are not deleted, so safe to manipulate. */
854 proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
855 if (IS_ERR_OR_NULL(proc_entry)) {
856 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
861 new_iface->proc_ptr = proc_entry;
863 create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
864 read_proc_u64, &new_iface->totals[IFS_TX].bytes);
865 create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
866 read_proc_u64, &new_iface->totals[IFS_RX].bytes);
867 create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
868 read_proc_u64, &new_iface->totals[IFS_TX].packets);
869 create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
870 read_proc_u64, &new_iface->totals[IFS_RX].packets);
871 create_proc_read_entry("active", proc_iface_perms, proc_entry,
872 read_proc_bool, &new_iface->active);
874 IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
875 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
880 * Will set the entry's active state, and
881 * update the net_dev accordingly also.
883 static void _iface_stat_set_active(struct iface_stat *entry,
884 struct net_device *net_dev,
888 entry->net_dev = net_dev;
889 entry->active = true;
890 IF_DEBUG("qtaguid: %s(%s): "
891 "enable tracking. rfcnt=%d\n", __func__,
893 __this_cpu_read(*net_dev->pcpu_refcnt));
895 entry->active = false;
896 entry->net_dev = NULL;
897 IF_DEBUG("qtaguid: %s(%s): "
898 "disable tracking. rfcnt=%d\n", __func__,
900 __this_cpu_read(*net_dev->pcpu_refcnt));
905 /* Caller must hold iface_stat_list_lock */
906 static struct iface_stat *iface_alloc(struct net_device *net_dev)
908 struct iface_stat *new_iface;
909 struct iface_stat_work *isw;
911 new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
912 if (new_iface == NULL) {
913 pr_err("qtaguid: iface_stat: create(%s): "
914 "iface_stat alloc failed\n", net_dev->name);
917 new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
918 if (new_iface->ifname == NULL) {
919 pr_err("qtaguid: iface_stat: create(%s): "
920 "ifname alloc failed\n", net_dev->name);
924 spin_lock_init(&new_iface->tag_stat_list_lock);
925 new_iface->tag_stat_tree = RB_ROOT;
926 _iface_stat_set_active(new_iface, net_dev, true);
929 * ipv6 notifier chains are atomic :(
930 * No create_proc_read_entry() for you!
932 isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
934 pr_err("qtaguid: iface_stat: create(%s): "
935 "work alloc failed\n", new_iface->ifname);
936 _iface_stat_set_active(new_iface, net_dev, false);
937 kfree(new_iface->ifname);
941 isw->iface_entry = new_iface;
942 INIT_WORK(&isw->iface_work, iface_create_proc_worker);
943 schedule_work(&isw->iface_work);
944 list_add(&new_iface->list, &iface_stat_list);
948 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
949 struct iface_stat *iface)
951 struct rtnl_link_stats64 dev_stats, *stats;
954 stats = dev_get_stats(net_dev, &dev_stats);
955 /* No empty packets */
957 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
958 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
960 IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
961 "bytes rx/tx=%llu/%llu "
962 "active=%d last_known=%d "
963 "stats_rewound=%d\n", __func__,
964 net_dev ? net_dev->name : "?",
966 stats->rx_bytes, stats->tx_bytes,
967 iface->active, iface->last_known_valid, stats_rewound);
969 if (iface->active && iface->last_known_valid && stats_rewound) {
970 pr_warn_once("qtaguid: iface_stat: %s(%s): "
971 "iface reset its stats unexpectedly\n", __func__,
974 iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
975 iface->totals[IFS_TX].packets +=
976 iface->last_known[IFS_TX].packets;
977 iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
978 iface->totals[IFS_RX].packets +=
979 iface->last_known[IFS_RX].packets;
980 iface->last_known_valid = false;
981 IF_DEBUG("qtaguid: %s(%s): iface=%p "
982 "used last known bytes rx/tx=%llu/%llu\n", __func__,
983 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
984 iface->last_known[IFS_TX].bytes);
989 * Create a new entry for tracking the specified interface.
990 * Do nothing if the entry already exists.
991 * Called when an interface is configured with a valid IP address.
993 static void iface_stat_create(struct net_device *net_dev,
994 struct in_ifaddr *ifa)
996 struct in_device *in_dev = NULL;
998 struct iface_stat *entry;
1000 struct iface_stat *new_iface;
1002 IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1003 net_dev ? net_dev->name : "?",
1006 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1010 ifname = net_dev->name;
1012 in_dev = in_dev_get(net_dev);
1014 pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1018 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1020 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1021 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1022 "ifa=%p ifa_label=%s\n",
1024 ifa->ifa_label ? ifa->ifa_label : "(null)");
1025 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1031 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1035 ipaddr = ifa->ifa_local;
1037 spin_lock_bh(&iface_stat_list_lock);
1038 entry = get_iface_entry(ifname);
1039 if (entry != NULL) {
1040 bool activate = !ipv4_is_loopback(ipaddr);
1041 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1043 iface_check_stats_reset_and_adjust(net_dev, entry);
1044 _iface_stat_set_active(entry, net_dev, activate);
1045 IF_DEBUG("qtaguid: %s(%s): "
1046 "tracking now %d on ip=%pI4\n", __func__,
1047 entry->ifname, activate, &ipaddr);
1048 goto done_unlock_put;
1049 } else if (ipv4_is_loopback(ipaddr)) {
1050 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1051 "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
1052 goto done_unlock_put;
1055 new_iface = iface_alloc(net_dev);
1056 IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1057 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1059 spin_unlock_bh(&iface_stat_list_lock);
1065 static void iface_stat_create_ipv6(struct net_device *net_dev,
1066 struct inet6_ifaddr *ifa)
1068 struct in_device *in_dev;
1070 struct iface_stat *entry;
1071 struct iface_stat *new_iface;
1074 IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1075 ifa, net_dev, net_dev ? net_dev->name : "");
1077 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1080 ifname = net_dev->name;
1082 in_dev = in_dev_get(net_dev);
1084 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1089 IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1093 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1097 addr_type = ipv6_addr_type(&ifa->addr);
1099 spin_lock_bh(&iface_stat_list_lock);
1100 entry = get_iface_entry(ifname);
1101 if (entry != NULL) {
1102 bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
1103 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1105 iface_check_stats_reset_and_adjust(net_dev, entry);
1106 _iface_stat_set_active(entry, net_dev, activate);
1107 IF_DEBUG("qtaguid: %s(%s): "
1108 "tracking now %d on ip=%pI6c\n", __func__,
1109 entry->ifname, activate, &ifa->addr);
1110 goto done_unlock_put;
1111 } else if (addr_type & IPV6_ADDR_LOOPBACK) {
1112 IF_DEBUG("qtaguid: %s(%s): "
1113 "ignore loopback dev. ip=%pI6c\n", __func__,
1114 ifname, &ifa->addr);
1115 goto done_unlock_put;
1118 new_iface = iface_alloc(net_dev);
1119 IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1120 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1123 spin_unlock_bh(&iface_stat_list_lock);
1128 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1130 MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1131 return sock_tag_tree_search(&sock_tag_tree, sk);
1134 static struct sock_tag *get_sock_stat(const struct sock *sk)
1136 struct sock_tag *sock_tag_entry;
1137 MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1140 spin_lock_bh(&sock_tag_list_lock);
1141 sock_tag_entry = get_sock_stat_nl(sk);
1142 spin_unlock_bh(&sock_tag_list_lock);
1143 return sock_tag_entry;
1147 data_counters_update(struct data_counters *dc, int set,
1148 enum ifs_tx_rx direction, int proto, int bytes)
1152 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1155 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1159 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1166 * Update stats for the specified interface. Do nothing if the entry
1167 * does not exist (when a device was never configured with an IP address).
1168 * Called when an device is being unregistered.
1170 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1172 struct rtnl_link_stats64 dev_stats, *stats;
1173 struct iface_stat *entry;
1175 stats = dev_get_stats(net_dev, &dev_stats);
1176 spin_lock_bh(&iface_stat_list_lock);
1177 entry = get_iface_entry(net_dev->name);
1178 if (entry == NULL) {
1179 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1181 spin_unlock_bh(&iface_stat_list_lock);
1185 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1186 net_dev->name, entry);
1187 if (!entry->active) {
1188 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1190 spin_unlock_bh(&iface_stat_list_lock);
1195 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1196 entry->last_known[IFS_TX].packets = stats->tx_packets;
1197 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1198 entry->last_known[IFS_RX].packets = stats->rx_packets;
1199 entry->last_known_valid = true;
1200 IF_DEBUG("qtaguid: %s(%s): "
1201 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1202 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1203 spin_unlock_bh(&iface_stat_list_lock);
1206 entry->totals[IFS_TX].bytes += stats->tx_bytes;
1207 entry->totals[IFS_TX].packets += stats->tx_packets;
1208 entry->totals[IFS_RX].bytes += stats->rx_bytes;
1209 entry->totals[IFS_RX].packets += stats->rx_packets;
1210 /* We don't need the last_known[] anymore */
1211 entry->last_known_valid = false;
1212 _iface_stat_set_active(entry, net_dev, false);
1213 IF_DEBUG("qtaguid: %s(%s): "
1214 "disable tracking. rx/tx=%llu/%llu\n", __func__,
1215 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1216 spin_unlock_bh(&iface_stat_list_lock);
1219 static void tag_stat_update(struct tag_stat *tag_entry,
1220 enum ifs_tx_rx direction, int proto, int bytes)
1223 active_set = get_active_counter_set(tag_entry->tn.tag);
1224 MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1225 "dir=%d proto=%d bytes=%d)\n",
1226 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1227 active_set, direction, proto, bytes);
1228 data_counters_update(&tag_entry->counters, active_set, direction,
1230 if (tag_entry->parent_counters)
1231 data_counters_update(tag_entry->parent_counters, active_set,
1232 direction, proto, bytes);
1236 * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1238 * iface_entry->tag_stat_list_lock should be held.
1240 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1243 struct tag_stat *new_tag_stat_entry = NULL;
1244 IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1245 " (uid=%u)\n", __func__,
1246 iface_entry, tag, get_uid_from_tag(tag));
1247 new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1248 if (!new_tag_stat_entry) {
1249 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1252 new_tag_stat_entry->tn.tag = tag;
1253 tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1255 return new_tag_stat_entry;
1258 static void if_tag_stat_update(const char *ifname, uid_t uid,
1259 const struct sock *sk, enum ifs_tx_rx direction,
1260 int proto, int bytes)
1262 struct tag_stat *tag_stat_entry;
1263 tag_t tag, acct_tag;
1265 struct data_counters *uid_tag_counters;
1266 struct sock_tag *sock_tag_entry;
1267 struct iface_stat *iface_entry;
1268 struct tag_stat *new_tag_stat;
1269 MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1270 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1271 ifname, uid, sk, direction, proto, bytes);
1274 iface_entry = get_iface_entry(ifname);
1276 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1280 /* It is ok to process data when an iface_entry is inactive */
1282 MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1283 ifname, iface_entry);
1286 * Look for a tagged sock.
1287 * It will have an acct_uid.
1289 sock_tag_entry = get_sock_stat(sk);
1290 if (sock_tag_entry) {
1291 tag = sock_tag_entry->tag;
1292 acct_tag = get_atag_from_tag(tag);
1293 uid_tag = get_utag_from_tag(tag);
1295 acct_tag = make_atag_from_value(0);
1296 tag = combine_atag_with_uid(acct_tag, uid);
1297 uid_tag = make_tag_from_uid(uid);
1299 MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1300 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1301 tag, get_uid_from_tag(tag), iface_entry);
1302 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1303 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1305 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1307 if (tag_stat_entry) {
1309 * Updating the {acct_tag, uid_tag} entry handles both stats:
1310 * {0, uid_tag} will also get updated.
1312 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1313 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1317 /* Loop over tag list under this interface for {0,uid_tag} */
1318 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1320 if (!tag_stat_entry) {
1321 /* Here: the base uid_tag did not exist */
1323 * No parent counters. So
1324 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1326 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1327 uid_tag_counters = &new_tag_stat->counters;
1329 uid_tag_counters = &tag_stat_entry->counters;
1333 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1334 new_tag_stat->parent_counters = uid_tag_counters;
1336 tag_stat_update(new_tag_stat, direction, proto, bytes);
1337 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1340 static int iface_netdev_event_handler(struct notifier_block *nb,
1341 unsigned long event, void *ptr) {
1342 struct net_device *dev = ptr;
1344 if (unlikely(module_passive))
1347 IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1348 "ev=0x%lx/%s netdev=%p->name=%s\n",
1349 event, netdev_evt_str(event), dev, dev ? dev->name : "");
1353 iface_stat_create(dev, NULL);
1354 atomic64_inc(&qtu_events.iface_events);
1357 case NETDEV_UNREGISTER:
1358 iface_stat_update(dev, event == NETDEV_DOWN);
1359 atomic64_inc(&qtu_events.iface_events);
1365 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1366 unsigned long event, void *ptr)
1368 struct inet6_ifaddr *ifa = ptr;
1369 struct net_device *dev;
1371 if (unlikely(module_passive))
1374 IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1375 "ev=0x%lx/%s ifa=%p\n",
1376 event, netdev_evt_str(event), ifa);
1380 BUG_ON(!ifa || !ifa->idev);
1381 dev = (struct net_device *)ifa->idev->dev;
1382 iface_stat_create_ipv6(dev, ifa);
1383 atomic64_inc(&qtu_events.iface_events);
1386 case NETDEV_UNREGISTER:
1387 BUG_ON(!ifa || !ifa->idev);
1388 dev = (struct net_device *)ifa->idev->dev;
1389 iface_stat_update(dev, event == NETDEV_DOWN);
1390 atomic64_inc(&qtu_events.iface_events);
1396 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1397 unsigned long event, void *ptr)
1399 struct in_ifaddr *ifa = ptr;
1400 struct net_device *dev;
1402 if (unlikely(module_passive))
1405 IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1406 "ev=0x%lx/%s ifa=%p\n",
1407 event, netdev_evt_str(event), ifa);
1411 BUG_ON(!ifa || !ifa->ifa_dev);
1412 dev = ifa->ifa_dev->dev;
1413 iface_stat_create(dev, ifa);
1414 atomic64_inc(&qtu_events.iface_events);
1417 case NETDEV_UNREGISTER:
1418 BUG_ON(!ifa || !ifa->ifa_dev);
1419 dev = ifa->ifa_dev->dev;
1420 iface_stat_update(dev, event == NETDEV_DOWN);
1421 atomic64_inc(&qtu_events.iface_events);
1427 static struct notifier_block iface_netdev_notifier_blk = {
1428 .notifier_call = iface_netdev_event_handler,
1431 static struct notifier_block iface_inetaddr_notifier_blk = {
1432 .notifier_call = iface_inetaddr_event_handler,
1435 static struct notifier_block iface_inet6addr_notifier_blk = {
1436 .notifier_call = iface_inet6addr_event_handler,
1439 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1443 iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1444 if (!iface_stat_procdir) {
1445 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1450 iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1453 if (!iface_stat_all_procfile) {
1454 pr_err("qtaguid: iface_stat: init "
1455 " failed to create stat_all proc entry\n");
1459 iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
1462 err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1464 pr_err("qtaguid: iface_stat: init "
1465 "failed to register dev event handler\n");
1466 goto err_zap_all_stats_entry;
1468 err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1470 pr_err("qtaguid: iface_stat: init "
1471 "failed to register ipv4 dev event handler\n");
1475 err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1477 pr_err("qtaguid: iface_stat: init "
1478 "failed to register ipv6 dev event handler\n");
1479 goto err_unreg_ip4_addr;
1484 unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1486 unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1487 err_zap_all_stats_entry:
1488 remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1490 remove_proc_entry(iface_stat_procdirname, parent_procdir);
1495 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1496 struct xt_action_param *par)
1499 unsigned int hook_mask = (1 << par->hooknum);
1501 MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1502 par->hooknum, par->family);
1505 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1506 * return garbage SKs.
1508 if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1511 switch (par->family) {
1513 sk = xt_socket_get6_sk(skb, par);
1516 sk = xt_socket_get4_sk(skb, par);
1523 * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1524 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1525 * Not fixed in 3.0-r3 :(
1528 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1529 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1530 if (sk->sk_state == TCP_TIME_WAIT) {
1531 xt_socket_put_sk(sk);
1538 static void account_for_uid(const struct sk_buff *skb,
1539 const struct sock *alternate_sk, uid_t uid,
1540 struct xt_action_param *par)
1542 const struct net_device *el_dev;
1545 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1546 el_dev = par->in ? : par->out;
1548 const struct net_device *other_dev;
1550 other_dev = par->in ? : par->out;
1551 if (el_dev != other_dev) {
1552 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1553 "par->(in/out)=%p %s\n",
1554 par->hooknum, el_dev, el_dev->name, other_dev,
1559 if (unlikely(!el_dev)) {
1560 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1561 } else if (unlikely(!el_dev->name)) {
1562 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1564 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
1569 if_tag_stat_update(el_dev->name, uid,
1570 skb->sk ? skb->sk : alternate_sk,
1571 par->in ? IFS_RX : IFS_TX,
1572 ip_hdr(skb)->protocol, skb->len);
1576 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1578 const struct xt_qtaguid_match_info *info = par->matchinfo;
1579 const struct file *filp;
1580 bool got_sock = false;
1585 if (unlikely(module_passive))
1586 return (info->match ^ info->invert) == 0;
1588 MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1589 par->hooknum, skb, par->in, par->out, par->family);
1591 atomic64_inc(&qtu_events.match_calls);
1593 res = (info->match ^ info->invert) == 0;
1601 * A missing sk->sk_socket happens when packets are in-flight
1602 * and the matching socket is already closed and gone.
1604 sk = qtaguid_find_sk(skb, par);
1606 * If we got the socket from the find_sk(), we will need to put
1607 * it back, as nf_tproxy_get_sock_v4() got it.
1611 atomic64_inc(&qtu_events.match_found_sk_in_ct);
1613 atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1615 atomic64_inc(&qtu_events.match_found_sk);
1617 MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
1618 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
1620 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1621 par->hooknum, sk, sk->sk_socket,
1622 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1623 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1624 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1625 par->hooknum, filp ? filp->f_cred->fsuid : -1);
1628 if (sk == NULL || sk->sk_socket == NULL) {
1630 * Here, the qtaguid_find_sk() using connection tracking
1631 * couldn't find the owner, so for now we just count them
1632 * against the system.
1635 * TODO: unhack how to force just accounting.
1636 * For now we only do iface stats when the uid-owner is not
1639 if (!(info->match & XT_QTAGUID_UID))
1640 account_for_uid(skb, sk, 0, par);
1641 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1643 sk ? sk->sk_socket : NULL);
1644 res = (info->match ^ info->invert) == 0;
1645 atomic64_inc(&qtu_events.match_no_sk);
1646 goto put_sock_ret_res;
1647 } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1649 goto put_sock_ret_res;
1651 filp = sk->sk_socket->file;
1653 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1654 account_for_uid(skb, sk, 0, par);
1655 res = ((info->match ^ info->invert) &
1656 (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1657 atomic64_inc(&qtu_events.match_no_sk_file);
1658 goto put_sock_ret_res;
1660 sock_uid = filp->f_cred->fsuid;
1662 * TODO: unhack how to force just accounting.
1663 * For now we only do iface stats when the uid-owner is not requested
1665 if (!(info->match & XT_QTAGUID_UID))
1666 account_for_uid(skb, sk, sock_uid, par);
1669 * The following two tests fail the match when:
1670 * id not in range AND no inverted condition requested
1671 * or id in range AND inverted condition requested
1672 * Thus (!a && b) || (a && !b) == a ^ b
1674 if (info->match & XT_QTAGUID_UID)
1675 if ((filp->f_cred->fsuid >= info->uid_min &&
1676 filp->f_cred->fsuid <= info->uid_max) ^
1677 !(info->invert & XT_QTAGUID_UID)) {
1678 MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1681 goto put_sock_ret_res;
1683 if (info->match & XT_QTAGUID_GID)
1684 if ((filp->f_cred->fsgid >= info->gid_min &&
1685 filp->f_cred->fsgid <= info->gid_max) ^
1686 !(info->invert & XT_QTAGUID_GID)) {
1687 MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1690 goto put_sock_ret_res;
1693 MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1698 xt_socket_put_sk(sk);
1700 MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1705 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1706 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1712 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1715 fmt_buff = kasprintf(GFP_ATOMIC,
1716 "qtaguid: %s(): %s {\n", __func__, fmt);
1718 va_start(args, fmt);
1719 buff = kvasprintf(GFP_ATOMIC,
1722 pr_debug("%s", buff);
1727 spin_lock_bh(&sock_tag_list_lock);
1728 prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1729 spin_unlock_bh(&sock_tag_list_lock);
1731 spin_lock_bh(&sock_tag_list_lock);
1732 spin_lock_bh(&uid_tag_data_tree_lock);
1733 prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1734 prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1735 spin_unlock_bh(&uid_tag_data_tree_lock);
1736 spin_unlock_bh(&sock_tag_list_lock);
1738 spin_lock_bh(&iface_stat_list_lock);
1739 prdebug_iface_stat_list(indent_level, &iface_stat_list);
1740 spin_unlock_bh(&iface_stat_list_lock);
1742 pr_debug("qtaguid: %s(): }\n", __func__);
1745 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1749 * Procfs reader to get all active socket tags using style "1)" as described in
1752 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1753 off_t items_to_skip, int char_count, int *eof,
1759 struct rb_node *node;
1760 struct sock_tag *sock_tag_entry;
1762 int indent_level = 0;
1765 if (unlikely(module_passive)) {
1773 CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1774 page, items_to_skip, char_count, *eof);
1776 spin_lock_bh(&sock_tag_list_lock);
1777 for (node = rb_first(&sock_tag_tree);
1779 node = rb_next(node)) {
1780 if (item_index++ < items_to_skip)
1782 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1783 uid = get_uid_from_tag(sock_tag_entry->tag);
1784 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1787 sock_tag_entry->tag,
1791 f_count = atomic_long_read(
1792 &sock_tag_entry->socket->file->f_count);
1793 len = snprintf(outp, char_count,
1794 "sock=%p tag=0x%llx (uid=%u) pid=%u "
1797 sock_tag_entry->tag, uid,
1798 sock_tag_entry->pid, f_count);
1799 if (len >= char_count) {
1800 spin_unlock_bh(&sock_tag_list_lock);
1806 (*num_items_returned)++;
1808 spin_unlock_bh(&sock_tag_list_lock);
1810 if (item_index++ >= items_to_skip) {
1811 len = snprintf(outp, char_count,
1812 "events: sockets_tagged=%llu "
1813 "sockets_untagged=%llu "
1814 "counter_set_changes=%llu "
1816 "iface_events=%llu "
1818 "match_found_sk=%llu "
1819 "match_found_sk_in_ct=%llu "
1820 "match_found_no_sk_in_ct=%llu "
1822 "match_no_sk_file=%llu\n",
1823 atomic64_read(&qtu_events.sockets_tagged),
1824 atomic64_read(&qtu_events.sockets_untagged),
1825 atomic64_read(&qtu_events.counter_set_changes),
1826 atomic64_read(&qtu_events.delete_cmds),
1827 atomic64_read(&qtu_events.iface_events),
1828 atomic64_read(&qtu_events.match_calls),
1829 atomic64_read(&qtu_events.match_found_sk),
1830 atomic64_read(&qtu_events.match_found_sk_in_ct),
1832 &qtu_events.match_found_no_sk_in_ct),
1833 atomic64_read(&qtu_events.match_no_sk),
1834 atomic64_read(&qtu_events.match_no_sk_file));
1835 if (len >= char_count) {
1841 (*num_items_returned)++;
1844 /* Count the following as part of the last item_index */
1845 if (item_index > items_to_skip) {
1846 prdebug_full_state(indent_level, "proc ctrl");
1854 * Delete socket tags, and stat tags associated with a given
1855 * accouting tag and uid.
1857 static int ctrl_cmd_delete(const char *input)
1865 struct iface_stat *iface_entry;
1866 struct rb_node *node;
1867 struct sock_tag *st_entry;
1868 struct rb_root st_to_free_tree = RB_ROOT;
1869 struct tag_stat *ts_entry;
1870 struct tag_counter_set *tcs_entry;
1871 struct tag_ref *tr_entry;
1872 struct uid_tag_data *utd_entry;
1874 argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1875 CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1876 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1882 if (!valid_atag(acct_tag)) {
1883 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1888 uid = current_fsuid();
1889 } else if (!can_impersonate_uid(uid)) {
1890 pr_info("qtaguid: ctrl_delete(%s): "
1891 "insufficient priv from pid=%u tgid=%u uid=%u\n",
1892 input, current->pid, current->tgid, current_fsuid());
1897 tag = combine_atag_with_uid(acct_tag, uid);
1898 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1899 "looking for tag=0x%llx (uid=%u)\n",
1902 /* Delete socket tags */
1903 spin_lock_bh(&sock_tag_list_lock);
1904 node = rb_first(&sock_tag_tree);
1906 st_entry = rb_entry(node, struct sock_tag, sock_node);
1907 entry_uid = get_uid_from_tag(st_entry->tag);
1908 node = rb_next(node);
1909 if (entry_uid != uid)
1912 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
1913 input, st_entry->tag, entry_uid);
1915 if (!acct_tag || st_entry->tag == tag) {
1916 rb_erase(&st_entry->sock_node, &sock_tag_tree);
1917 /* Can't sockfd_put() within spinlock, do it later. */
1918 sock_tag_tree_insert(st_entry, &st_to_free_tree);
1919 tr_entry = lookup_tag_ref(st_entry->tag, NULL);
1920 BUG_ON(tr_entry->num_sock_tags <= 0);
1921 tr_entry->num_sock_tags--;
1923 * TODO: remove if, and start failing.
1924 * This is a hack to work around the fact that in some
1925 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
1926 * and are trying to work around apps
1927 * that didn't open the /dev/xt_qtaguid.
1929 if (st_entry->list.next && st_entry->list.prev)
1930 list_del(&st_entry->list);
1933 spin_unlock_bh(&sock_tag_list_lock);
1935 sock_tag_tree_erase(&st_to_free_tree);
1937 /* Delete tag counter-sets */
1938 spin_lock_bh(&tag_counter_set_list_lock);
1939 /* Counter sets are only on the uid tag, not full tag */
1940 tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1942 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1943 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1946 get_uid_from_tag(tcs_entry->tn.tag),
1947 tcs_entry->active_set);
1948 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1951 spin_unlock_bh(&tag_counter_set_list_lock);
1954 * If acct_tag is 0, then all entries belonging to uid are
1957 spin_lock_bh(&iface_stat_list_lock);
1958 list_for_each_entry(iface_entry, &iface_stat_list, list) {
1959 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1960 node = rb_first(&iface_entry->tag_stat_tree);
1962 ts_entry = rb_entry(node, struct tag_stat, tn.node);
1963 entry_uid = get_uid_from_tag(ts_entry->tn.tag);
1964 node = rb_next(node);
1966 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1967 "ts tag=0x%llx (uid=%u)\n",
1968 input, ts_entry->tn.tag, entry_uid);
1970 if (entry_uid != uid)
1972 if (!acct_tag || ts_entry->tn.tag == tag) {
1973 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1974 "erase ts: %s 0x%llx %u\n",
1975 input, iface_entry->ifname,
1976 get_atag_from_tag(ts_entry->tn.tag),
1978 rb_erase(&ts_entry->tn.node,
1979 &iface_entry->tag_stat_tree);
1983 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1985 spin_unlock_bh(&iface_stat_list_lock);
1987 /* Cleanup the uid_tag_data */
1988 spin_lock_bh(&uid_tag_data_tree_lock);
1989 node = rb_first(&uid_tag_data_tree);
1991 utd_entry = rb_entry(node, struct uid_tag_data, node);
1992 entry_uid = utd_entry->uid;
1993 node = rb_next(node);
1995 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1999 if (entry_uid != uid)
2002 * Go over the tag_refs, and those that don't have
2003 * sock_tags using them are freed.
2005 put_tag_ref_tree(tag, utd_entry);
2006 put_utd_entry(utd_entry);
2008 spin_unlock_bh(&uid_tag_data_tree_lock);
2010 atomic64_inc(&qtu_events.delete_cmds);
2017 static int ctrl_cmd_counter_set(const char *input)
2023 struct tag_counter_set *tcs;
2026 argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2027 CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2028 "set=%d uid=%u\n", input, argc, cmd,
2034 if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2035 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2040 if (!can_manipulate_uids()) {
2041 pr_info("qtaguid: ctrl_counterset(%s): "
2042 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2043 input, current->pid, current->tgid, current_fsuid());
2048 tag = make_tag_from_uid(uid);
2049 spin_lock_bh(&tag_counter_set_list_lock);
2050 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2052 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2054 spin_unlock_bh(&tag_counter_set_list_lock);
2055 pr_err("qtaguid: ctrl_counterset(%s): "
2056 "failed to alloc counter set\n",
2062 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2063 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2064 "(uid=%u) set=%d\n",
2065 input, tag, get_uid_from_tag(tag), counter_set);
2067 tcs->active_set = counter_set;
2068 spin_unlock_bh(&tag_counter_set_list_lock);
2069 atomic64_inc(&qtu_events.counter_set_changes);
2076 static int ctrl_cmd_tag(const char *input)
2081 tag_t acct_tag = make_atag_from_value(0);
2083 struct socket *el_socket;
2085 struct sock_tag *sock_tag_entry;
2086 struct tag_ref *tag_ref_entry;
2087 struct uid_tag_data *uid_tag_data_entry;
2088 struct proc_qtu_data *pqd_entry;
2090 /* Unassigned args will get defaulted later. */
2091 argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2092 CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2093 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2099 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2101 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2102 " sock_fd=%d err=%d\n", input, sock_fd, res);
2105 CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2106 input, atomic_long_read(&el_socket->file->f_count),
2109 acct_tag = make_atag_from_value(0);
2110 } else if (!valid_atag(acct_tag)) {
2111 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2115 CT_DEBUG("qtaguid: ctrl_tag(%s): "
2116 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2117 "in_group=%d in_egroup=%d\n",
2118 input, current->pid, current->tgid, current_uid(),
2119 current_euid(), current_fsuid(),
2120 in_group_p(proc_ctrl_write_gid),
2121 in_egroup_p(proc_ctrl_write_gid));
2123 uid = current_fsuid();
2124 } else if (!can_impersonate_uid(uid)) {
2125 pr_info("qtaguid: ctrl_tag(%s): "
2126 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2127 input, current->pid, current->tgid, current_fsuid());
2131 full_tag = combine_atag_with_uid(acct_tag, uid);
2133 spin_lock_bh(&sock_tag_list_lock);
2134 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2135 tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2136 if (IS_ERR(tag_ref_entry)) {
2137 res = PTR_ERR(tag_ref_entry);
2138 spin_unlock_bh(&sock_tag_list_lock);
2141 tag_ref_entry->num_sock_tags++;
2142 if (sock_tag_entry) {
2143 struct tag_ref *prev_tag_ref_entry;
2145 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2146 "st@%p ...->f_count=%ld\n",
2147 input, el_socket->sk, sock_tag_entry,
2148 atomic_long_read(&el_socket->file->f_count));
2150 * This is a re-tagging, so release the sock_fd that was
2151 * locked at the time of the 1st tagging.
2152 * There is still the ref from this call's sockfd_lookup() so
2153 * it can be done within the spinlock.
2155 sockfd_put(sock_tag_entry->socket);
2156 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2157 &uid_tag_data_entry);
2158 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2159 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2160 prev_tag_ref_entry->num_sock_tags--;
2161 sock_tag_entry->tag = full_tag;
2163 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2164 input, el_socket->sk);
2165 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2167 if (!sock_tag_entry) {
2168 pr_err("qtaguid: ctrl_tag(%s): "
2169 "socket tag alloc failed\n",
2171 spin_unlock_bh(&sock_tag_list_lock);
2173 goto err_tag_unref_put;
2175 sock_tag_entry->sk = el_socket->sk;
2176 sock_tag_entry->socket = el_socket;
2177 sock_tag_entry->pid = current->tgid;
2178 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2180 spin_lock_bh(&uid_tag_data_tree_lock);
2181 pqd_entry = proc_qtu_data_tree_search(
2182 &proc_qtu_data_tree, current->tgid);
2184 * TODO: remove if, and start failing.
2185 * At first, we want to catch user-space code that is not
2186 * opening the /dev/xt_qtaguid.
2188 if (IS_ERR_OR_NULL(pqd_entry))
2191 "User space forgot to open /dev/xt_qtaguid? "
2192 "pid=%u tgid=%u uid=%u\n", __func__,
2193 current->pid, current->tgid,
2196 list_add(&sock_tag_entry->list,
2197 &pqd_entry->sock_tag_list);
2198 spin_unlock_bh(&uid_tag_data_tree_lock);
2200 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2201 atomic64_inc(&qtu_events.sockets_tagged);
2203 spin_unlock_bh(&sock_tag_list_lock);
2204 /* We keep the ref to the socket (file) until it is untagged */
2205 CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2206 input, sock_tag_entry,
2207 atomic_long_read(&el_socket->file->f_count));
2211 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2212 tag_ref_entry->num_sock_tags--;
2213 free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2215 CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2216 input, atomic_long_read(&el_socket->file->f_count) - 1);
2217 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2218 sockfd_put(el_socket);
2222 CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2226 static int ctrl_cmd_untag(const char *input)
2230 struct socket *el_socket;
2232 struct sock_tag *sock_tag_entry;
2233 struct tag_ref *tag_ref_entry;
2234 struct uid_tag_data *utd_entry;
2235 struct proc_qtu_data *pqd_entry;
2237 argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2238 CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2239 input, argc, cmd, sock_fd);
2244 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2246 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2247 " sock_fd=%d err=%d\n", input, sock_fd, res);
2250 CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2251 input, atomic_long_read(&el_socket->file->f_count),
2253 spin_lock_bh(&sock_tag_list_lock);
2254 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2255 if (!sock_tag_entry) {
2256 spin_unlock_bh(&sock_tag_list_lock);
2261 * The socket already belongs to the current process
2262 * so it can do whatever it wants to it.
2264 rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2266 tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2267 BUG_ON(!tag_ref_entry);
2268 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2269 spin_lock_bh(&uid_tag_data_tree_lock);
2270 pqd_entry = proc_qtu_data_tree_search(
2271 &proc_qtu_data_tree, current->tgid);
2273 * TODO: remove if, and start failing.
2274 * At first, we want to catch user-space code that is not
2275 * opening the /dev/xt_qtaguid.
2277 if (IS_ERR_OR_NULL(pqd_entry))
2278 pr_warn_once("qtaguid: %s(): "
2279 "User space forgot to open /dev/xt_qtaguid? "
2280 "pid=%u tgid=%u uid=%u\n", __func__,
2281 current->pid, current->tgid, current_fsuid());
2283 list_del(&sock_tag_entry->list);
2284 spin_unlock_bh(&uid_tag_data_tree_lock);
2286 * We don't free tag_ref from the utd_entry here,
2287 * only during a cmd_delete().
2289 tag_ref_entry->num_sock_tags--;
2290 spin_unlock_bh(&sock_tag_list_lock);
2292 * Release the sock_fd that was grabbed at tag time,
2293 * and once more for the sockfd_lookup() here.
2295 sockfd_put(sock_tag_entry->socket);
2296 CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2297 input, sock_tag_entry,
2298 atomic_long_read(&el_socket->file->f_count) - 1);
2299 sockfd_put(el_socket);
2301 kfree(sock_tag_entry);
2302 atomic64_inc(&qtu_events.sockets_untagged);
2307 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2308 input, atomic_long_read(&el_socket->file->f_count) - 1);
2309 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2310 sockfd_put(el_socket);
2314 CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2318 static int qtaguid_ctrl_parse(const char *input, int count)
2324 /* Collect params for commands */
2327 res = ctrl_cmd_delete(input);
2331 res = ctrl_cmd_counter_set(input);
2335 res = ctrl_cmd_tag(input);
2339 res = ctrl_cmd_untag(input);
2349 CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2353 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2354 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2355 unsigned long count, void *data)
2357 char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2359 if (unlikely(module_passive))
2362 if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2365 if (copy_from_user(input_buf, buffer, count))
2368 input_buf[count] = '\0';
2369 return qtaguid_ctrl_parse(input_buf, count);
2372 struct proc_print_info {
2374 char **num_items_returned;
2375 struct iface_stat *iface_entry;
2376 struct tag_stat *ts_entry;
2382 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2385 struct data_counters *cnts;
2387 if (!ppi->item_index) {
2388 if (ppi->item_index++ < ppi->items_to_skip)
2390 len = snprintf(ppi->outp, ppi->char_count,
2391 "idx iface acct_tag_hex uid_tag_int cnt_set "
2392 "rx_bytes rx_packets "
2393 "tx_bytes tx_packets "
2394 "rx_tcp_bytes rx_tcp_packets "
2395 "rx_udp_bytes rx_udp_packets "
2396 "rx_other_bytes rx_other_packets "
2397 "tx_tcp_bytes tx_tcp_packets "
2398 "tx_udp_bytes tx_udp_packets "
2399 "tx_other_bytes tx_other_packets\n");
2401 tag_t tag = ppi->ts_entry->tn.tag;
2402 uid_t stat_uid = get_uid_from_tag(tag);
2404 if (!can_read_other_uid_stats(stat_uid)) {
2405 CT_DEBUG("qtaguid: stats line: "
2406 "%s 0x%llx %u: insufficient priv "
2407 "from pid=%u tgid=%u uid=%u\n",
2408 ppi->iface_entry->ifname,
2409 get_atag_from_tag(tag), stat_uid,
2410 current->pid, current->tgid, current_fsuid());
2413 if (ppi->item_index++ < ppi->items_to_skip)
2415 cnts = &ppi->ts_entry->counters;
2417 ppi->outp, ppi->char_count,
2418 "%d %s 0x%llx %u %u "
2428 ppi->iface_entry->ifname,
2429 get_atag_from_tag(tag),
2432 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2433 dc_sum_packets(cnts, cnt_set, IFS_RX),
2434 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2435 dc_sum_packets(cnts, cnt_set, IFS_TX),
2436 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2437 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2438 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2439 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2440 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2441 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2442 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2443 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2444 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2445 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2446 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2447 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2452 static bool pp_sets(struct proc_print_info *ppi)
2456 for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2458 len = pp_stats_line(ppi, counter_set);
2459 if (len >= ppi->char_count) {
2465 ppi->char_count -= len;
2466 (*ppi->num_items_returned)++;
2473 * Procfs reader to get all tag stats using style "1)" as described in
2475 * Groups all protocols tx/rx bytes.
2477 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2478 off_t items_to_skip, int char_count, int *eof,
2481 struct proc_print_info ppi;
2486 ppi.char_count = char_count;
2487 ppi.num_items_returned = num_items_returned;
2488 ppi.items_to_skip = items_to_skip;
2490 if (unlikely(module_passive)) {
2491 len = pp_stats_line(&ppi, 0);
2492 /* The header should always be shorter than the buffer. */
2493 BUG_ON(len >= ppi.char_count);
2494 (*num_items_returned)++;
2499 CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
2500 "char_count=%d *eof=%d\n", page, *num_items_returned,
2501 items_to_skip, char_count, *eof);
2506 /* The idx is there to help debug when things go belly up. */
2507 len = pp_stats_line(&ppi, 0);
2508 /* Don't advance the outp unless the whole line was printed */
2509 if (len >= ppi.char_count) {
2511 return ppi.outp - page;
2515 ppi.char_count -= len;
2516 (*num_items_returned)++;
2519 spin_lock_bh(&iface_stat_list_lock);
2520 list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2521 struct rb_node *node;
2522 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2523 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2525 node = rb_next(node)) {
2526 ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2527 if (!pp_sets(&ppi)) {
2529 &ppi.iface_entry->tag_stat_list_lock);
2530 spin_unlock_bh(&iface_stat_list_lock);
2531 return ppi.outp - page;
2534 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2536 spin_unlock_bh(&iface_stat_list_lock);
2539 return ppi.outp - page;
2542 /*------------------------------------------*/
2543 static int qtudev_open(struct inode *inode, struct file *file)
2545 struct uid_tag_data *utd_entry;
2546 struct proc_qtu_data *pqd_entry;
2547 struct proc_qtu_data *new_pqd_entry;
2549 bool utd_entry_found;
2551 if (unlikely(qtu_proc_handling_passive))
2554 DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2555 current->pid, current->tgid, current_fsuid());
2557 spin_lock_bh(&uid_tag_data_tree_lock);
2559 /* Look for existing uid data, or alloc one. */
2560 utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2561 if (IS_ERR_OR_NULL(utd_entry)) {
2562 res = PTR_ERR(utd_entry);
2566 /* Look for existing PID based proc_data */
2567 pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2570 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2571 "%s already opened\n",
2572 current->pid, current->tgid, current_fsuid(),
2575 goto err_unlock_free_utd;
2578 new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2579 if (!new_pqd_entry) {
2580 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2581 "proc data alloc failed\n",
2582 current->pid, current->tgid, current_fsuid());
2584 goto err_unlock_free_utd;
2586 new_pqd_entry->pid = current->tgid;
2587 INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2588 new_pqd_entry->parent_tag_data = utd_entry;
2589 utd_entry->num_pqd++;
2591 proc_qtu_data_tree_insert(new_pqd_entry,
2592 &proc_qtu_data_tree);
2594 spin_unlock_bh(&uid_tag_data_tree_lock);
2595 DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2596 current_fsuid(), new_pqd_entry);
2597 file->private_data = new_pqd_entry;
2600 err_unlock_free_utd:
2601 if (!utd_entry_found) {
2602 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2605 spin_unlock_bh(&uid_tag_data_tree_lock);
2610 static int qtudev_release(struct inode *inode, struct file *file)
2612 struct proc_qtu_data *pqd_entry = file->private_data;
2613 struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
2614 struct sock_tag *st_entry;
2615 struct rb_root st_to_free_tree = RB_ROOT;
2616 struct list_head *entry, *next;
2619 if (unlikely(qtu_proc_handling_passive))
2623 * Do not trust the current->pid, it might just be a kworker cleaning
2624 * up after a dead proc.
2626 DR_DEBUG("qtaguid: qtudev_release(): "
2627 "pid=%u tgid=%u uid=%u "
2628 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2629 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2630 pqd_entry, pqd_entry->pid, utd_entry,
2631 utd_entry->num_active_tags);
2633 spin_lock_bh(&sock_tag_list_lock);
2634 spin_lock_bh(&uid_tag_data_tree_lock);
2636 list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2637 st_entry = list_entry(entry, struct sock_tag, list);
2638 DR_DEBUG("qtaguid: %s(): "
2639 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2641 st_entry, st_entry->sk,
2642 current->pid, current->tgid,
2643 pqd_entry->parent_tag_data->uid);
2645 utd_entry = uid_tag_data_tree_search(
2647 get_uid_from_tag(st_entry->tag));
2648 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2649 DR_DEBUG("qtaguid: %s(): "
2650 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2651 st_entry->tag, utd_entry);
2652 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2655 BUG_ON(tr->num_sock_tags <= 0);
2656 tr->num_sock_tags--;
2657 free_tag_ref_from_utd_entry(tr, utd_entry);
2659 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2660 list_del(&st_entry->list);
2661 /* Can't sockfd_put() within spinlock, do it later. */
2662 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2665 * Try to free the utd_entry if no other proc_qtu_data is
2666 * using it (num_pqd is 0) and it doesn't have active tags
2667 * (num_active_tags is 0).
2669 put_utd_entry(utd_entry);
2672 rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2673 BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2674 pqd_entry->parent_tag_data->num_pqd--;
2675 put_utd_entry(pqd_entry->parent_tag_data);
2677 file->private_data = NULL;
2679 spin_unlock_bh(&uid_tag_data_tree_lock);
2680 spin_unlock_bh(&sock_tag_list_lock);
2683 sock_tag_tree_erase(&st_to_free_tree);
2685 prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2686 current->pid, current->tgid);
2690 /*------------------------------------------*/
2691 static const struct file_operations qtudev_fops = {
2692 .owner = THIS_MODULE,
2693 .open = qtudev_open,
2694 .release = qtudev_release,
2697 static struct miscdevice qtu_device = {
2698 .minor = MISC_DYNAMIC_MINOR,
2699 .name = QTU_DEV_NAME,
2700 .fops = &qtudev_fops,
2701 /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2704 /*------------------------------------------*/
2705 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2708 *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2709 if (!*res_procdir) {
2710 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2715 xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2717 if (!xt_qtaguid_ctrl_file) {
2718 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2723 xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2724 xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2726 xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2728 if (!xt_qtaguid_stats_file) {
2729 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2732 goto no_stats_entry;
2734 xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2736 * TODO: add support counter hacking
2737 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2742 remove_proc_entry("ctrl", *res_procdir);
2744 remove_proc_entry("xt_qtaguid", NULL);
2749 static struct xt_match qtaguid_mt_reg __read_mostly = {
2751 * This module masquerades as the "owner" module so that iptables
2752 * tools can deal with it.
2756 .family = NFPROTO_UNSPEC,
2757 .match = qtaguid_mt,
2758 .matchsize = sizeof(struct xt_qtaguid_match_info),
2762 static int __init qtaguid_mt_init(void)
2764 if (qtaguid_proc_register(&xt_qtaguid_procdir)
2765 || iface_stat_init(xt_qtaguid_procdir)
2766 || xt_register_match(&qtaguid_mt_reg)
2767 || misc_register(&qtu_device))
2773 * TODO: allow unloading of the module.
2774 * For now stats are permanent.
2775 * Kconfig forces'y/n' and never an 'm'.
2778 module_init(qtaguid_mt_init);
2779 MODULE_AUTHOR("jpa <jpa@google.com>");
2780 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2781 MODULE_LICENSE("GPL");
2782 MODULE_ALIAS("ipt_owner");
2783 MODULE_ALIAS("ip6t_owner");
2784 MODULE_ALIAS("ipt_qtaguid");
2785 MODULE_ALIAS("ip6t_qtaguid");