2 * Kernel iptables module to track stats for packets based on user tags.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
12 * There are run-time debug flags enabled via the debug_mask module param, or
13 * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/skbuff.h>
23 #include <linux/workqueue.h>
24 #include <net/addrconf.h>
29 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
30 #include <linux/netfilter_ipv6/ip6_tables.h>
33 #include <linux/netfilter/xt_socket.h>
34 #include "xt_qtaguid_internal.h"
35 #include "xt_qtaguid_print.h"
38 * We only use the xt_socket funcs within a similar context to avoid unexpected
41 #define XT_SOCKET_SUPPORTED_HOOKS \
42 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
45 static const char *module_procdirname = "xt_qtaguid";
46 static struct proc_dir_entry *xt_qtaguid_procdir;
48 static unsigned int proc_iface_perms = S_IRUGO;
49 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
51 static struct proc_dir_entry *xt_qtaguid_stats_file;
52 static unsigned int proc_stats_perms = S_IRUGO;
53 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
55 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
56 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
57 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
59 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
61 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
63 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
64 #include <linux/android_aid.h>
65 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
66 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
68 /* 0 means, don't limit anybody */
69 static gid_t proc_stats_readall_gid;
70 static gid_t proc_ctrl_write_gid;
72 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
74 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
78 * Limit the number of active tags (via socket tags) for a given UID.
79 * Multiple processes could share the UID.
81 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
82 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
85 * After the kernel has initiallized this module, it is still possible
87 * Setting passive to Y:
88 * - the iface stats handling will not act on notifications.
89 * - iptables matches will never match.
90 * - ctrl commands silently succeed.
91 * - stats are always empty.
92 * This is mostly usefull when a bug is suspected.
94 static bool module_passive;
95 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
98 * Control how qtaguid data is tracked per proc/uid.
99 * Setting tag_tracking_passive to Y:
100 * - don't create proc specific structs to track tags
101 * - don't check that active tag stats exceed some limits.
102 * - don't clean up socket tags on process exits.
103 * This is mostly usefull when a bug is suspected.
105 static bool qtu_proc_handling_passive;
106 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
109 #define QTU_DEV_NAME "xt_qtaguid"
111 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
112 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
114 /*---------------------------------------------------------------------------*/
115 static const char *iface_stat_procdirname = "iface_stat";
116 static struct proc_dir_entry *iface_stat_procdir;
117 static const char *iface_stat_all_procfilename = "iface_stat_all";
118 static struct proc_dir_entry *iface_stat_all_procfile;
123 * iface_stat_list_lock
126 * uid_tag_data_tree_lock
127 * tag_counter_set_list_lock
128 * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
131 * Call tree with all lock holders as of 2011-09-25:
133 * iface_stat_all_proc_read()
134 * iface_stat_list_lock
135 * (struct iface_stat)
137 * qtaguid_ctrl_proc_read()
140 * (struct proc_qtu_data->sock_tag_list)
141 * prdebug_full_state()
144 * uid_tag_data_tree_lock
145 * (uid_tag_data_tree)
146 * (proc_qtu_data_tree)
147 * iface_stat_list_lock
149 * qtaguid_stats_proc_read()
150 * iface_stat_list_lock
151 * struct iface_stat->tag_stat_list_lock
154 * uid_tag_data_tree_lock
157 * sock_tag_data_list_lock
158 * uid_tag_data_tree_lock
159 * prdebug_full_state()
161 * uid_tag_data_tree_lock
162 * iface_stat_list_lock
164 * iface_netdev_event_handler()
165 * iface_stat_create()
166 * iface_stat_list_lock
167 * iface_stat_update()
168 * iface_stat_list_lock
170 * iface_inetaddr_event_handler()
171 * iface_stat_create()
172 * iface_stat_list_lock
173 * iface_stat_update()
174 * iface_stat_list_lock
176 * iface_inet6addr_event_handler()
177 * iface_stat_create_ipv6()
178 * iface_stat_list_lock
179 * iface_stat_update()
180 * iface_stat_list_lock
184 * if_tag_stat_update()
187 * struct iface_stat->tag_stat_list_lock
189 * get_active_counter_set()
190 * tag_counter_set_list_lock
192 * get_active_counter_set()
193 * tag_counter_set_list_lock
196 * qtaguid_ctrl_parse()
199 * tag_counter_set_list_lock
200 * iface_stat_list_lock
201 * struct iface_stat->tag_stat_list_lock
202 * uid_tag_data_tree_lock
203 * ctrl_cmd_counter_set()
204 * tag_counter_set_list_lock
209 * uid_tag_data_tree_lock
210 * (uid_tag_data_tree)
211 * uid_tag_data_tree_lock
212 * (proc_qtu_data_tree)
215 * uid_tag_data_tree_lock
218 static LIST_HEAD(iface_stat_list);
219 static DEFINE_SPINLOCK(iface_stat_list_lock);
221 static struct rb_root sock_tag_tree = RB_ROOT;
222 static DEFINE_SPINLOCK(sock_tag_list_lock);
224 static struct rb_root tag_counter_set_tree = RB_ROOT;
225 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
227 static struct rb_root uid_tag_data_tree = RB_ROOT;
228 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
230 static struct rb_root proc_qtu_data_tree = RB_ROOT;
231 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
233 static struct qtaguid_event_counts qtu_events;
234 /*----------------------------------------------*/
235 static bool can_manipulate_uids(void)
238 return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
239 || in_egroup_p(proc_ctrl_write_gid);
242 static bool can_impersonate_uid(uid_t uid)
244 return uid == current_fsuid() || can_manipulate_uids();
247 static bool can_read_other_uid_stats(uid_t uid)
250 return unlikely(!current_fsuid()) || uid == current_fsuid()
251 || unlikely(!proc_stats_readall_gid)
252 || in_egroup_p(proc_stats_readall_gid);
255 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
256 enum ifs_tx_rx direction,
257 enum ifs_proto ifs_proto,
261 counters->bpc[set][direction][ifs_proto].bytes += bytes;
262 counters->bpc[set][direction][ifs_proto].packets += packets;
265 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
267 enum ifs_tx_rx direction)
269 return counters->bpc[set][direction][IFS_TCP].bytes
270 + counters->bpc[set][direction][IFS_UDP].bytes
271 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
274 static inline uint64_t dc_sum_packets(struct data_counters *counters,
276 enum ifs_tx_rx direction)
278 return counters->bpc[set][direction][IFS_TCP].packets
279 + counters->bpc[set][direction][IFS_UDP].packets
280 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
283 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
285 struct rb_node *node = root->rb_node;
288 struct tag_node *data = rb_entry(node, struct tag_node, node);
290 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
291 " node=%p data=%p\n", tag, node, data);
292 result = tag_compare(tag, data->tag);
293 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
294 " data.tag=0x%llx (uid=%u) res=%d\n",
295 tag, data->tag, get_uid_from_tag(data->tag), result);
297 node = node->rb_left;
299 node = node->rb_right;
306 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
308 struct rb_node **new = &(root->rb_node), *parent = NULL;
310 /* Figure out where to put new node */
312 struct tag_node *this = rb_entry(*new, struct tag_node,
314 int result = tag_compare(data->tag, this->tag);
315 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
316 " (uid=%u)\n", __func__,
318 get_uid_from_tag(this->tag));
321 new = &((*new)->rb_left);
323 new = &((*new)->rb_right);
328 /* Add new node and rebalance tree. */
329 rb_link_node(&data->node, parent, new);
330 rb_insert_color(&data->node, root);
333 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
335 tag_node_tree_insert(&data->tn, root);
338 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
340 struct tag_node *node = tag_node_tree_search(root, tag);
343 return rb_entry(&node->node, struct tag_stat, tn.node);
346 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
347 struct rb_root *root)
349 tag_node_tree_insert(&data->tn, root);
352 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
355 struct tag_node *node = tag_node_tree_search(root, tag);
358 return rb_entry(&node->node, struct tag_counter_set, tn.node);
362 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
364 tag_node_tree_insert(&data->tn, root);
367 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
369 struct tag_node *node = tag_node_tree_search(root, tag);
372 return rb_entry(&node->node, struct tag_ref, tn.node);
375 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
376 const struct sock *sk)
378 struct rb_node *node = root->rb_node;
381 struct sock_tag *data = rb_entry(node, struct sock_tag,
384 node = node->rb_left;
385 else if (sk > data->sk)
386 node = node->rb_right;
393 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
395 struct rb_node **new = &(root->rb_node), *parent = NULL;
397 /* Figure out where to put new node */
399 struct sock_tag *this = rb_entry(*new, struct sock_tag,
402 if (data->sk < this->sk)
403 new = &((*new)->rb_left);
404 else if (data->sk > this->sk)
405 new = &((*new)->rb_right);
410 /* Add new node and rebalance tree. */
411 rb_link_node(&data->sock_node, parent, new);
412 rb_insert_color(&data->sock_node, root);
415 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
417 struct rb_node *node;
418 struct sock_tag *st_entry;
420 node = rb_first(st_to_free_tree);
422 st_entry = rb_entry(node, struct sock_tag, sock_node);
423 node = rb_next(node);
424 CT_DEBUG("qtaguid: %s(): "
425 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
428 get_uid_from_tag(st_entry->tag));
429 rb_erase(&st_entry->sock_node, st_to_free_tree);
430 sockfd_put(st_entry->socket);
435 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
438 struct rb_node *node = root->rb_node;
441 struct proc_qtu_data *data = rb_entry(node,
442 struct proc_qtu_data,
445 node = node->rb_left;
446 else if (pid > data->pid)
447 node = node->rb_right;
454 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
455 struct rb_root *root)
457 struct rb_node **new = &(root->rb_node), *parent = NULL;
459 /* Figure out where to put new node */
461 struct proc_qtu_data *this = rb_entry(*new,
462 struct proc_qtu_data,
465 if (data->pid < this->pid)
466 new = &((*new)->rb_left);
467 else if (data->pid > this->pid)
468 new = &((*new)->rb_right);
473 /* Add new node and rebalance tree. */
474 rb_link_node(&data->node, parent, new);
475 rb_insert_color(&data->node, root);
478 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
479 struct rb_root *root)
481 struct rb_node **new = &(root->rb_node), *parent = NULL;
483 /* Figure out where to put new node */
485 struct uid_tag_data *this = rb_entry(*new,
489 if (data->uid < this->uid)
490 new = &((*new)->rb_left);
491 else if (data->uid > this->uid)
492 new = &((*new)->rb_right);
497 /* Add new node and rebalance tree. */
498 rb_link_node(&data->node, parent, new);
499 rb_insert_color(&data->node, root);
502 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
505 struct rb_node *node = root->rb_node;
508 struct uid_tag_data *data = rb_entry(node,
512 node = node->rb_left;
513 else if (uid > data->uid)
514 node = node->rb_right;
522 * Allocates a new uid_tag_data struct if needed.
523 * Returns a pointer to the found or allocated uid_tag_data.
524 * Returns a PTR_ERR on failures, and lock is not held.
525 * If found is not NULL:
526 * sets *found to true if not allocated.
527 * sets *found to false if allocated.
529 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
531 struct uid_tag_data *utd_entry;
533 /* Look for top level uid_tag_data for the UID */
534 utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
535 DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
538 *found_res = utd_entry;
542 utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
544 pr_err("qtaguid: get_uid_data(%u): "
545 "tag data alloc failed\n", uid);
546 return ERR_PTR(-ENOMEM);
549 utd_entry->uid = uid;
550 utd_entry->tag_ref_tree = RB_ROOT;
551 uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
552 DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
556 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
557 static struct tag_ref *new_tag_ref(tag_t new_tag,
558 struct uid_tag_data *utd_entry)
560 struct tag_ref *tr_entry;
563 if (utd_entry->num_active_tags + 1 > max_sock_tags) {
564 pr_info("qtaguid: new_tag_ref(0x%llx): "
565 "tag ref alloc quota exceeded. max=%d\n",
566 new_tag, max_sock_tags);
572 tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
574 pr_err("qtaguid: new_tag_ref(0x%llx): "
575 "tag ref alloc failed\n",
580 tr_entry->tn.tag = new_tag;
581 /* tr_entry->num_sock_tags handled by caller */
582 utd_entry->num_active_tags++;
583 tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
584 DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
585 " inserted new tag ref %p\n",
593 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
594 struct uid_tag_data **utd_res)
596 struct uid_tag_data *utd_entry;
597 struct tag_ref *tr_entry;
599 uid_t uid = get_uid_from_tag(full_tag);
601 DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
604 utd_entry = get_uid_data(uid, &found_utd);
605 if (IS_ERR_OR_NULL(utd_entry)) {
607 *utd_res = utd_entry;
611 tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
613 *utd_res = utd_entry;
614 DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
615 full_tag, utd_entry, tr_entry);
619 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
620 static struct tag_ref *get_tag_ref(tag_t full_tag,
621 struct uid_tag_data **utd_res)
623 struct uid_tag_data *utd_entry;
624 struct tag_ref *tr_entry;
626 DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
628 spin_lock_bh(&uid_tag_data_tree_lock);
629 tr_entry = lookup_tag_ref(full_tag, &utd_entry);
630 BUG_ON(IS_ERR_OR_NULL(utd_entry));
632 tr_entry = new_tag_ref(full_tag, utd_entry);
634 spin_unlock_bh(&uid_tag_data_tree_lock);
636 *utd_res = utd_entry;
637 DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
638 full_tag, utd_entry, tr_entry);
642 /* Checks and maybe frees the UID Tag Data entry */
643 static void put_utd_entry(struct uid_tag_data *utd_entry)
645 /* Are we done with the UID tag data entry? */
646 if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
647 !utd_entry->num_pqd) {
648 DR_DEBUG("qtaguid: %s(): "
649 "erase utd_entry=%p uid=%u "
650 "by pid=%u tgid=%u uid=%u\n", __func__,
651 utd_entry, utd_entry->uid,
652 current->pid, current->tgid, current_fsuid());
653 BUG_ON(utd_entry->num_active_tags);
654 rb_erase(&utd_entry->node, &uid_tag_data_tree);
657 DR_DEBUG("qtaguid: %s(): "
658 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
659 __func__, utd_entry, utd_entry->num_active_tags,
661 BUG_ON(!(utd_entry->num_active_tags ||
662 utd_entry->num_pqd));
667 * If no sock_tags are using this tag_ref,
668 * decrements refcount of utd_entry, removes tr_entry
669 * from utd_entry->tag_ref_tree and frees.
671 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
672 struct uid_tag_data *utd_entry)
674 DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
675 tr_entry, tr_entry->tn.tag,
676 get_uid_from_tag(tr_entry->tn.tag));
677 if (!tr_entry->num_sock_tags) {
678 BUG_ON(!utd_entry->num_active_tags);
679 utd_entry->num_active_tags--;
680 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
681 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
686 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
688 struct rb_node *node;
689 struct tag_ref *tr_entry;
692 DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
693 full_tag, get_uid_from_tag(full_tag));
694 acct_tag = get_atag_from_tag(full_tag);
695 node = rb_first(&utd_entry->tag_ref_tree);
697 tr_entry = rb_entry(node, struct tag_ref, tn.node);
698 node = rb_next(node);
699 if (!acct_tag || tr_entry->tn.tag == full_tag)
700 free_tag_ref_from_utd_entry(tr_entry, utd_entry);
704 static int read_proc_u64(char *page, char **start, off_t off,
705 int count, int *eof, void *data)
710 uint64_t *iface_entry = data;
715 value = *iface_entry;
716 p += sprintf(p, "%llu\n", value);
717 len = (p - page) - off;
718 *eof = (len <= count) ? 1 : 0;
723 static int read_proc_bool(char *page, char **start, off_t off,
724 int count, int *eof, void *data)
729 bool *bool_entry = data;
735 p += sprintf(p, "%u\n", value);
736 len = (p - page) - off;
737 *eof = (len <= count) ? 1 : 0;
742 static int get_active_counter_set(tag_t tag)
745 struct tag_counter_set *tcs;
747 MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
749 tag, get_uid_from_tag(tag));
750 /* For now we only handle UID tags for active sets */
751 tag = get_utag_from_tag(tag);
752 spin_lock_bh(&tag_counter_set_list_lock);
753 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
755 active_set = tcs->active_set;
756 spin_unlock_bh(&tag_counter_set_list_lock);
761 * Find the entry for tracking the specified interface.
762 * Caller must hold iface_stat_list_lock
764 static struct iface_stat *get_iface_entry(const char *ifname)
766 struct iface_stat *iface_entry;
768 /* Find the entry for tracking the specified tag within the interface */
769 if (ifname == NULL) {
770 pr_info("qtaguid: iface_stat: get() NULL device name\n");
774 /* Iterate over interfaces */
775 list_for_each_entry(iface_entry, &iface_stat_list, list) {
776 if (!strcmp(ifname, iface_entry->ifname))
784 static int iface_stat_all_proc_read(char *page, char **num_items_returned,
785 off_t items_to_skip, int char_count,
786 int *eof, void *data)
791 struct iface_stat *iface_entry;
792 struct rtnl_link_stats64 dev_stats, *stats;
793 struct rtnl_link_stats64 no_dev_stats = {0};
795 if (unlikely(module_passive)) {
800 CT_DEBUG("qtaguid:proc iface_stat_all "
801 "page=%p *num_items_returned=%p off=%ld "
802 "char_count=%d *eof=%d\n", page, *num_items_returned,
803 items_to_skip, char_count, *eof);
809 * This lock will prevent iface_stat_update() from changing active,
810 * and in turn prevent an interface from unregistering itself.
812 spin_lock_bh(&iface_stat_list_lock);
813 list_for_each_entry(iface_entry, &iface_stat_list, list) {
814 if (item_index++ < items_to_skip)
817 if (iface_entry->active) {
818 stats = dev_get_stats(iface_entry->net_dev,
821 stats = &no_dev_stats;
823 len = snprintf(outp, char_count,
825 "%llu %llu %llu %llu "
826 "%llu %llu %llu %llu\n",
829 iface_entry->totals[IFS_RX].bytes,
830 iface_entry->totals[IFS_RX].packets,
831 iface_entry->totals[IFS_TX].bytes,
832 iface_entry->totals[IFS_TX].packets,
833 stats->rx_bytes, stats->rx_packets,
834 stats->tx_bytes, stats->tx_packets);
835 if (len >= char_count) {
836 spin_unlock_bh(&iface_stat_list_lock);
842 (*num_items_returned)++;
844 spin_unlock_bh(&iface_stat_list_lock);
850 static void iface_create_proc_worker(struct work_struct *work)
852 struct proc_dir_entry *proc_entry;
853 struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
855 struct iface_stat *new_iface = isw->iface_entry;
857 /* iface_entries are not deleted, so safe to manipulate. */
858 proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
859 if (IS_ERR_OR_NULL(proc_entry)) {
860 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
865 new_iface->proc_ptr = proc_entry;
867 create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
868 read_proc_u64, &new_iface->totals[IFS_TX].bytes);
869 create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
870 read_proc_u64, &new_iface->totals[IFS_RX].bytes);
871 create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
872 read_proc_u64, &new_iface->totals[IFS_TX].packets);
873 create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
874 read_proc_u64, &new_iface->totals[IFS_RX].packets);
875 create_proc_read_entry("active", proc_iface_perms, proc_entry,
876 read_proc_bool, &new_iface->active);
878 IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
879 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
884 * Will set the entry's active state, and
885 * update the net_dev accordingly also.
887 static void _iface_stat_set_active(struct iface_stat *entry,
888 struct net_device *net_dev,
892 entry->net_dev = net_dev;
893 entry->active = true;
894 IF_DEBUG("qtaguid: %s(%s): "
895 "enable tracking. rfcnt=%d\n", __func__,
897 __this_cpu_read(*net_dev->pcpu_refcnt));
899 entry->active = false;
900 entry->net_dev = NULL;
901 IF_DEBUG("qtaguid: %s(%s): "
902 "disable tracking. rfcnt=%d\n", __func__,
904 __this_cpu_read(*net_dev->pcpu_refcnt));
909 /* Caller must hold iface_stat_list_lock */
910 static struct iface_stat *iface_alloc(struct net_device *net_dev)
912 struct iface_stat *new_iface;
913 struct iface_stat_work *isw;
915 new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
916 if (new_iface == NULL) {
917 pr_err("qtaguid: iface_stat: create(%s): "
918 "iface_stat alloc failed\n", net_dev->name);
921 new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
922 if (new_iface->ifname == NULL) {
923 pr_err("qtaguid: iface_stat: create(%s): "
924 "ifname alloc failed\n", net_dev->name);
928 spin_lock_init(&new_iface->tag_stat_list_lock);
929 new_iface->tag_stat_tree = RB_ROOT;
930 _iface_stat_set_active(new_iface, net_dev, true);
933 * ipv6 notifier chains are atomic :(
934 * No create_proc_read_entry() for you!
936 isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
938 pr_err("qtaguid: iface_stat: create(%s): "
939 "work alloc failed\n", new_iface->ifname);
940 _iface_stat_set_active(new_iface, net_dev, false);
941 kfree(new_iface->ifname);
945 isw->iface_entry = new_iface;
946 INIT_WORK(&isw->iface_work, iface_create_proc_worker);
947 schedule_work(&isw->iface_work);
948 list_add(&new_iface->list, &iface_stat_list);
952 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
953 struct iface_stat *iface)
955 struct rtnl_link_stats64 dev_stats, *stats;
958 stats = dev_get_stats(net_dev, &dev_stats);
959 /* No empty packets */
961 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
962 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
964 IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
965 "bytes rx/tx=%llu/%llu "
966 "active=%d last_known=%d "
967 "stats_rewound=%d\n", __func__,
968 net_dev ? net_dev->name : "?",
970 stats->rx_bytes, stats->tx_bytes,
971 iface->active, iface->last_known_valid, stats_rewound);
973 if (iface->active && iface->last_known_valid && stats_rewound) {
974 pr_warn_once("qtaguid: iface_stat: %s(%s): "
975 "iface reset its stats unexpectedly\n", __func__,
978 iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
979 iface->totals[IFS_TX].packets +=
980 iface->last_known[IFS_TX].packets;
981 iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
982 iface->totals[IFS_RX].packets +=
983 iface->last_known[IFS_RX].packets;
984 iface->last_known_valid = false;
985 IF_DEBUG("qtaguid: %s(%s): iface=%p "
986 "used last known bytes rx/tx=%llu/%llu\n", __func__,
987 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
988 iface->last_known[IFS_TX].bytes);
993 * Create a new entry for tracking the specified interface.
994 * Do nothing if the entry already exists.
995 * Called when an interface is configured with a valid IP address.
997 static void iface_stat_create(struct net_device *net_dev,
998 struct in_ifaddr *ifa)
1000 struct in_device *in_dev = NULL;
1002 struct iface_stat *entry;
1004 struct iface_stat *new_iface;
1006 IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1007 net_dev ? net_dev->name : "?",
1010 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1014 ifname = net_dev->name;
1016 in_dev = in_dev_get(net_dev);
1018 pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1022 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1024 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1025 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1026 "ifa=%p ifa_label=%s\n",
1028 ifa->ifa_label ? ifa->ifa_label : "(null)");
1029 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1035 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1039 ipaddr = ifa->ifa_local;
1041 spin_lock_bh(&iface_stat_list_lock);
1042 entry = get_iface_entry(ifname);
1043 if (entry != NULL) {
1044 bool activate = !ipv4_is_loopback(ipaddr);
1045 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1047 iface_check_stats_reset_and_adjust(net_dev, entry);
1048 _iface_stat_set_active(entry, net_dev, activate);
1049 IF_DEBUG("qtaguid: %s(%s): "
1050 "tracking now %d on ip=%pI4\n", __func__,
1051 entry->ifname, activate, &ipaddr);
1052 goto done_unlock_put;
1053 } else if (ipv4_is_loopback(ipaddr)) {
1054 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1055 "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
1056 goto done_unlock_put;
1059 new_iface = iface_alloc(net_dev);
1060 IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1061 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1063 spin_unlock_bh(&iface_stat_list_lock);
1069 static void iface_stat_create_ipv6(struct net_device *net_dev,
1070 struct inet6_ifaddr *ifa)
1072 struct in_device *in_dev;
1074 struct iface_stat *entry;
1075 struct iface_stat *new_iface;
1078 IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1079 ifa, net_dev, net_dev ? net_dev->name : "");
1081 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1084 ifname = net_dev->name;
1086 in_dev = in_dev_get(net_dev);
1088 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1093 IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1097 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1101 addr_type = ipv6_addr_type(&ifa->addr);
1103 spin_lock_bh(&iface_stat_list_lock);
1104 entry = get_iface_entry(ifname);
1105 if (entry != NULL) {
1106 bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
1107 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1109 iface_check_stats_reset_and_adjust(net_dev, entry);
1110 _iface_stat_set_active(entry, net_dev, activate);
1111 IF_DEBUG("qtaguid: %s(%s): "
1112 "tracking now %d on ip=%pI6c\n", __func__,
1113 entry->ifname, activate, &ifa->addr);
1114 goto done_unlock_put;
1115 } else if (addr_type & IPV6_ADDR_LOOPBACK) {
1116 IF_DEBUG("qtaguid: %s(%s): "
1117 "ignore loopback dev. ip=%pI6c\n", __func__,
1118 ifname, &ifa->addr);
1119 goto done_unlock_put;
1122 new_iface = iface_alloc(net_dev);
1123 IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1124 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1127 spin_unlock_bh(&iface_stat_list_lock);
1132 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1134 MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1135 return sock_tag_tree_search(&sock_tag_tree, sk);
1138 static struct sock_tag *get_sock_stat(const struct sock *sk)
1140 struct sock_tag *sock_tag_entry;
1141 MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1144 spin_lock_bh(&sock_tag_list_lock);
1145 sock_tag_entry = get_sock_stat_nl(sk);
1146 spin_unlock_bh(&sock_tag_list_lock);
1147 return sock_tag_entry;
1151 data_counters_update(struct data_counters *dc, int set,
1152 enum ifs_tx_rx direction, int proto, int bytes)
1156 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1159 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1163 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1170 * Update stats for the specified interface. Do nothing if the entry
1171 * does not exist (when a device was never configured with an IP address).
1172 * Called when an device is being unregistered.
1174 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1176 struct rtnl_link_stats64 dev_stats, *stats;
1177 struct iface_stat *entry;
1179 stats = dev_get_stats(net_dev, &dev_stats);
1180 spin_lock_bh(&iface_stat_list_lock);
1181 entry = get_iface_entry(net_dev->name);
1182 if (entry == NULL) {
1183 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1185 spin_unlock_bh(&iface_stat_list_lock);
1189 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1190 net_dev->name, entry);
1191 if (!entry->active) {
1192 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1194 spin_unlock_bh(&iface_stat_list_lock);
1199 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1200 entry->last_known[IFS_TX].packets = stats->tx_packets;
1201 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1202 entry->last_known[IFS_RX].packets = stats->rx_packets;
1203 entry->last_known_valid = true;
1204 IF_DEBUG("qtaguid: %s(%s): "
1205 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1206 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1207 spin_unlock_bh(&iface_stat_list_lock);
1210 entry->totals[IFS_TX].bytes += stats->tx_bytes;
1211 entry->totals[IFS_TX].packets += stats->tx_packets;
1212 entry->totals[IFS_RX].bytes += stats->rx_bytes;
1213 entry->totals[IFS_RX].packets += stats->rx_packets;
1214 /* We don't need the last_known[] anymore */
1215 entry->last_known_valid = false;
1216 _iface_stat_set_active(entry, net_dev, false);
1217 IF_DEBUG("qtaguid: %s(%s): "
1218 "disable tracking. rx/tx=%llu/%llu\n", __func__,
1219 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1220 spin_unlock_bh(&iface_stat_list_lock);
1223 static void tag_stat_update(struct tag_stat *tag_entry,
1224 enum ifs_tx_rx direction, int proto, int bytes)
1227 active_set = get_active_counter_set(tag_entry->tn.tag);
1228 MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1229 "dir=%d proto=%d bytes=%d)\n",
1230 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1231 active_set, direction, proto, bytes);
1232 data_counters_update(&tag_entry->counters, active_set, direction,
1234 if (tag_entry->parent_counters)
1235 data_counters_update(tag_entry->parent_counters, active_set,
1236 direction, proto, bytes);
1240 * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1242 * iface_entry->tag_stat_list_lock should be held.
1244 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1247 struct tag_stat *new_tag_stat_entry = NULL;
1248 IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1249 " (uid=%u)\n", __func__,
1250 iface_entry, tag, get_uid_from_tag(tag));
1251 new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1252 if (!new_tag_stat_entry) {
1253 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1256 new_tag_stat_entry->tn.tag = tag;
1257 tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1259 return new_tag_stat_entry;
1262 static void if_tag_stat_update(const char *ifname, uid_t uid,
1263 const struct sock *sk, enum ifs_tx_rx direction,
1264 int proto, int bytes)
1266 struct tag_stat *tag_stat_entry;
1267 tag_t tag, acct_tag;
1269 struct data_counters *uid_tag_counters;
1270 struct sock_tag *sock_tag_entry;
1271 struct iface_stat *iface_entry;
1272 struct tag_stat *new_tag_stat = NULL;
1273 MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1274 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1275 ifname, uid, sk, direction, proto, bytes);
1278 iface_entry = get_iface_entry(ifname);
1280 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1284 /* It is ok to process data when an iface_entry is inactive */
1286 MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1287 ifname, iface_entry);
1290 * Look for a tagged sock.
1291 * It will have an acct_uid.
1293 sock_tag_entry = get_sock_stat(sk);
1294 if (sock_tag_entry) {
1295 tag = sock_tag_entry->tag;
1296 acct_tag = get_atag_from_tag(tag);
1297 uid_tag = get_utag_from_tag(tag);
1299 acct_tag = make_atag_from_value(0);
1300 tag = combine_atag_with_uid(acct_tag, uid);
1301 uid_tag = make_tag_from_uid(uid);
1303 MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1304 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1305 tag, get_uid_from_tag(tag), iface_entry);
1306 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1307 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1309 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1311 if (tag_stat_entry) {
1313 * Updating the {acct_tag, uid_tag} entry handles both stats:
1314 * {0, uid_tag} will also get updated.
1316 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1317 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1321 /* Loop over tag list under this interface for {0,uid_tag} */
1322 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1324 if (!tag_stat_entry) {
1325 /* Here: the base uid_tag did not exist */
1327 * No parent counters. So
1328 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1330 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1331 uid_tag_counters = &new_tag_stat->counters;
1333 uid_tag_counters = &tag_stat_entry->counters;
1337 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1338 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1339 new_tag_stat->parent_counters = uid_tag_counters;
1342 * For new_tag_stat to be still NULL here would require:
1343 * {0, uid_tag} exists
1344 * and {acct_tag, uid_tag} doesn't exist
1345 * AND acct_tag == 0.
1346 * Impossible. This reassures us that new_tag_stat
1347 * below will always be assigned.
1349 BUG_ON(!new_tag_stat);
1351 tag_stat_update(new_tag_stat, direction, proto, bytes);
1352 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1355 static int iface_netdev_event_handler(struct notifier_block *nb,
1356 unsigned long event, void *ptr) {
1357 struct net_device *dev = ptr;
1359 if (unlikely(module_passive))
1362 IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1363 "ev=0x%lx/%s netdev=%p->name=%s\n",
1364 event, netdev_evt_str(event), dev, dev ? dev->name : "");
1368 iface_stat_create(dev, NULL);
1369 atomic64_inc(&qtu_events.iface_events);
1372 case NETDEV_UNREGISTER:
1373 iface_stat_update(dev, event == NETDEV_DOWN);
1374 atomic64_inc(&qtu_events.iface_events);
1380 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1381 unsigned long event, void *ptr)
1383 struct inet6_ifaddr *ifa = ptr;
1384 struct net_device *dev;
1386 if (unlikely(module_passive))
1389 IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1390 "ev=0x%lx/%s ifa=%p\n",
1391 event, netdev_evt_str(event), ifa);
1395 BUG_ON(!ifa || !ifa->idev);
1396 dev = (struct net_device *)ifa->idev->dev;
1397 iface_stat_create_ipv6(dev, ifa);
1398 atomic64_inc(&qtu_events.iface_events);
1401 case NETDEV_UNREGISTER:
1402 BUG_ON(!ifa || !ifa->idev);
1403 dev = (struct net_device *)ifa->idev->dev;
1404 iface_stat_update(dev, event == NETDEV_DOWN);
1405 atomic64_inc(&qtu_events.iface_events);
1411 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1412 unsigned long event, void *ptr)
1414 struct in_ifaddr *ifa = ptr;
1415 struct net_device *dev;
1417 if (unlikely(module_passive))
1420 IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1421 "ev=0x%lx/%s ifa=%p\n",
1422 event, netdev_evt_str(event), ifa);
1426 BUG_ON(!ifa || !ifa->ifa_dev);
1427 dev = ifa->ifa_dev->dev;
1428 iface_stat_create(dev, ifa);
1429 atomic64_inc(&qtu_events.iface_events);
1432 case NETDEV_UNREGISTER:
1433 BUG_ON(!ifa || !ifa->ifa_dev);
1434 dev = ifa->ifa_dev->dev;
1435 iface_stat_update(dev, event == NETDEV_DOWN);
1436 atomic64_inc(&qtu_events.iface_events);
1442 static struct notifier_block iface_netdev_notifier_blk = {
1443 .notifier_call = iface_netdev_event_handler,
1446 static struct notifier_block iface_inetaddr_notifier_blk = {
1447 .notifier_call = iface_inetaddr_event_handler,
1450 static struct notifier_block iface_inet6addr_notifier_blk = {
1451 .notifier_call = iface_inet6addr_event_handler,
1454 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1458 iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1459 if (!iface_stat_procdir) {
1460 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1465 iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1468 if (!iface_stat_all_procfile) {
1469 pr_err("qtaguid: iface_stat: init "
1470 " failed to create stat_all proc entry\n");
1474 iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
1477 err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1479 pr_err("qtaguid: iface_stat: init "
1480 "failed to register dev event handler\n");
1481 goto err_zap_all_stats_entry;
1483 err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1485 pr_err("qtaguid: iface_stat: init "
1486 "failed to register ipv4 dev event handler\n");
1490 err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1492 pr_err("qtaguid: iface_stat: init "
1493 "failed to register ipv6 dev event handler\n");
1494 goto err_unreg_ip4_addr;
1499 unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1501 unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1502 err_zap_all_stats_entry:
1503 remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1505 remove_proc_entry(iface_stat_procdirname, parent_procdir);
1510 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1511 struct xt_action_param *par)
1514 unsigned int hook_mask = (1 << par->hooknum);
1516 MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1517 par->hooknum, par->family);
1520 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1521 * return garbage SKs.
1523 if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1526 switch (par->family) {
1528 sk = xt_socket_get6_sk(skb, par);
1531 sk = xt_socket_get4_sk(skb, par);
1538 * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1539 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1540 * Not fixed in 3.0-r3 :(
1543 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1544 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1545 if (sk->sk_state == TCP_TIME_WAIT) {
1546 xt_socket_put_sk(sk);
1553 static int ipx_proto(const struct sk_buff *skb,
1554 struct xt_action_param *par)
1556 int thoff = 0, tproto;
1558 switch (par->family) {
1560 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1562 MT_DEBUG("%s(): transport header not found in ipv6"
1563 " skb=%p\n", __func__, skb);
1566 tproto = ip_hdr(skb)->protocol;
1569 tproto = IPPROTO_RAW;
1574 static void account_for_uid(const struct sk_buff *skb,
1575 const struct sock *alternate_sk, uid_t uid,
1576 struct xt_action_param *par)
1578 const struct net_device *el_dev;
1581 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1582 el_dev = par->in ? : par->out;
1584 const struct net_device *other_dev;
1586 other_dev = par->in ? : par->out;
1587 if (el_dev != other_dev) {
1588 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1589 "par->(in/out)=%p %s\n",
1590 par->hooknum, el_dev, el_dev->name, other_dev,
1595 if (unlikely(!el_dev)) {
1596 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1597 } else if (unlikely(!el_dev->name)) {
1598 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1600 int proto = ipx_proto(skb, par);
1601 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1602 par->hooknum, el_dev->name, el_dev->type,
1603 par->family, proto);
1605 if_tag_stat_update(el_dev->name, uid,
1606 skb->sk ? skb->sk : alternate_sk,
1607 par->in ? IFS_RX : IFS_TX,
1612 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1614 const struct xt_qtaguid_match_info *info = par->matchinfo;
1615 const struct file *filp;
1616 bool got_sock = false;
1621 if (unlikely(module_passive))
1622 return (info->match ^ info->invert) == 0;
1624 MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1625 par->hooknum, skb, par->in, par->out, par->family);
1627 atomic64_inc(&qtu_events.match_calls);
1629 res = (info->match ^ info->invert) == 0;
1637 * A missing sk->sk_socket happens when packets are in-flight
1638 * and the matching socket is already closed and gone.
1640 sk = qtaguid_find_sk(skb, par);
1642 * If we got the socket from the find_sk(), we will need to put
1643 * it back, as nf_tproxy_get_sock_v4() got it.
1647 atomic64_inc(&qtu_events.match_found_sk_in_ct);
1649 atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1651 atomic64_inc(&qtu_events.match_found_sk);
1653 MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1654 par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1656 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1657 par->hooknum, sk, sk->sk_socket,
1658 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1659 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1660 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1661 par->hooknum, filp ? filp->f_cred->fsuid : -1);
1664 if (sk == NULL || sk->sk_socket == NULL) {
1666 * Here, the qtaguid_find_sk() using connection tracking
1667 * couldn't find the owner, so for now we just count them
1668 * against the system.
1671 * TODO: unhack how to force just accounting.
1672 * For now we only do iface stats when the uid-owner is not
1675 if (!(info->match & XT_QTAGUID_UID))
1676 account_for_uid(skb, sk, 0, par);
1677 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1679 sk ? sk->sk_socket : NULL);
1680 res = (info->match ^ info->invert) == 0;
1681 atomic64_inc(&qtu_events.match_no_sk);
1682 goto put_sock_ret_res;
1683 } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1685 goto put_sock_ret_res;
1687 filp = sk->sk_socket->file;
1689 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1690 account_for_uid(skb, sk, 0, par);
1691 res = ((info->match ^ info->invert) &
1692 (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1693 atomic64_inc(&qtu_events.match_no_sk_file);
1694 goto put_sock_ret_res;
1696 sock_uid = filp->f_cred->fsuid;
1698 * TODO: unhack how to force just accounting.
1699 * For now we only do iface stats when the uid-owner is not requested
1701 if (!(info->match & XT_QTAGUID_UID))
1702 account_for_uid(skb, sk, sock_uid, par);
1705 * The following two tests fail the match when:
1706 * id not in range AND no inverted condition requested
1707 * or id in range AND inverted condition requested
1708 * Thus (!a && b) || (a && !b) == a ^ b
1710 if (info->match & XT_QTAGUID_UID)
1711 if ((filp->f_cred->fsuid >= info->uid_min &&
1712 filp->f_cred->fsuid <= info->uid_max) ^
1713 !(info->invert & XT_QTAGUID_UID)) {
1714 MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1717 goto put_sock_ret_res;
1719 if (info->match & XT_QTAGUID_GID)
1720 if ((filp->f_cred->fsgid >= info->gid_min &&
1721 filp->f_cred->fsgid <= info->gid_max) ^
1722 !(info->invert & XT_QTAGUID_GID)) {
1723 MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1726 goto put_sock_ret_res;
1729 MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1734 xt_socket_put_sk(sk);
1736 MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1741 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1742 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1748 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1751 fmt_buff = kasprintf(GFP_ATOMIC,
1752 "qtaguid: %s(): %s {\n", __func__, fmt);
1754 va_start(args, fmt);
1755 buff = kvasprintf(GFP_ATOMIC,
1758 pr_debug("%s", buff);
1763 spin_lock_bh(&sock_tag_list_lock);
1764 prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1765 spin_unlock_bh(&sock_tag_list_lock);
1767 spin_lock_bh(&sock_tag_list_lock);
1768 spin_lock_bh(&uid_tag_data_tree_lock);
1769 prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1770 prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1771 spin_unlock_bh(&uid_tag_data_tree_lock);
1772 spin_unlock_bh(&sock_tag_list_lock);
1774 spin_lock_bh(&iface_stat_list_lock);
1775 prdebug_iface_stat_list(indent_level, &iface_stat_list);
1776 spin_unlock_bh(&iface_stat_list_lock);
1778 pr_debug("qtaguid: %s(): }\n", __func__);
1781 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1785 * Procfs reader to get all active socket tags using style "1)" as described in
1788 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1789 off_t items_to_skip, int char_count, int *eof,
1795 struct rb_node *node;
1796 struct sock_tag *sock_tag_entry;
1798 int indent_level = 0;
1801 if (unlikely(module_passive)) {
1809 CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1810 page, items_to_skip, char_count, *eof);
1812 spin_lock_bh(&sock_tag_list_lock);
1813 for (node = rb_first(&sock_tag_tree);
1815 node = rb_next(node)) {
1816 if (item_index++ < items_to_skip)
1818 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1819 uid = get_uid_from_tag(sock_tag_entry->tag);
1820 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1823 sock_tag_entry->tag,
1827 f_count = atomic_long_read(
1828 &sock_tag_entry->socket->file->f_count);
1829 len = snprintf(outp, char_count,
1830 "sock=%p tag=0x%llx (uid=%u) pid=%u "
1833 sock_tag_entry->tag, uid,
1834 sock_tag_entry->pid, f_count);
1835 if (len >= char_count) {
1836 spin_unlock_bh(&sock_tag_list_lock);
1842 (*num_items_returned)++;
1844 spin_unlock_bh(&sock_tag_list_lock);
1846 if (item_index++ >= items_to_skip) {
1847 len = snprintf(outp, char_count,
1848 "events: sockets_tagged=%llu "
1849 "sockets_untagged=%llu "
1850 "counter_set_changes=%llu "
1852 "iface_events=%llu "
1854 "match_found_sk=%llu "
1855 "match_found_sk_in_ct=%llu "
1856 "match_found_no_sk_in_ct=%llu "
1858 "match_no_sk_file=%llu\n",
1859 atomic64_read(&qtu_events.sockets_tagged),
1860 atomic64_read(&qtu_events.sockets_untagged),
1861 atomic64_read(&qtu_events.counter_set_changes),
1862 atomic64_read(&qtu_events.delete_cmds),
1863 atomic64_read(&qtu_events.iface_events),
1864 atomic64_read(&qtu_events.match_calls),
1865 atomic64_read(&qtu_events.match_found_sk),
1866 atomic64_read(&qtu_events.match_found_sk_in_ct),
1868 &qtu_events.match_found_no_sk_in_ct),
1869 atomic64_read(&qtu_events.match_no_sk),
1870 atomic64_read(&qtu_events.match_no_sk_file));
1871 if (len >= char_count) {
1877 (*num_items_returned)++;
1880 /* Count the following as part of the last item_index */
1881 if (item_index > items_to_skip) {
1882 prdebug_full_state(indent_level, "proc ctrl");
1890 * Delete socket tags, and stat tags associated with a given
1891 * accouting tag and uid.
1893 static int ctrl_cmd_delete(const char *input)
1901 struct iface_stat *iface_entry;
1902 struct rb_node *node;
1903 struct sock_tag *st_entry;
1904 struct rb_root st_to_free_tree = RB_ROOT;
1905 struct tag_stat *ts_entry;
1906 struct tag_counter_set *tcs_entry;
1907 struct tag_ref *tr_entry;
1908 struct uid_tag_data *utd_entry;
1910 argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1911 CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1912 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1918 if (!valid_atag(acct_tag)) {
1919 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1924 uid = current_fsuid();
1925 } else if (!can_impersonate_uid(uid)) {
1926 pr_info("qtaguid: ctrl_delete(%s): "
1927 "insufficient priv from pid=%u tgid=%u uid=%u\n",
1928 input, current->pid, current->tgid, current_fsuid());
1933 tag = combine_atag_with_uid(acct_tag, uid);
1934 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1935 "looking for tag=0x%llx (uid=%u)\n",
1938 /* Delete socket tags */
1939 spin_lock_bh(&sock_tag_list_lock);
1940 node = rb_first(&sock_tag_tree);
1942 st_entry = rb_entry(node, struct sock_tag, sock_node);
1943 entry_uid = get_uid_from_tag(st_entry->tag);
1944 node = rb_next(node);
1945 if (entry_uid != uid)
1948 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
1949 input, st_entry->tag, entry_uid);
1951 if (!acct_tag || st_entry->tag == tag) {
1952 rb_erase(&st_entry->sock_node, &sock_tag_tree);
1953 /* Can't sockfd_put() within spinlock, do it later. */
1954 sock_tag_tree_insert(st_entry, &st_to_free_tree);
1955 tr_entry = lookup_tag_ref(st_entry->tag, NULL);
1956 BUG_ON(tr_entry->num_sock_tags <= 0);
1957 tr_entry->num_sock_tags--;
1959 * TODO: remove if, and start failing.
1960 * This is a hack to work around the fact that in some
1961 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
1962 * and are trying to work around apps
1963 * that didn't open the /dev/xt_qtaguid.
1965 if (st_entry->list.next && st_entry->list.prev)
1966 list_del(&st_entry->list);
1969 spin_unlock_bh(&sock_tag_list_lock);
1971 sock_tag_tree_erase(&st_to_free_tree);
1973 /* Delete tag counter-sets */
1974 spin_lock_bh(&tag_counter_set_list_lock);
1975 /* Counter sets are only on the uid tag, not full tag */
1976 tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1978 CT_DEBUG("qtaguid: ctrl_delete(%s): "
1979 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1982 get_uid_from_tag(tcs_entry->tn.tag),
1983 tcs_entry->active_set);
1984 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1987 spin_unlock_bh(&tag_counter_set_list_lock);
1990 * If acct_tag is 0, then all entries belonging to uid are
1993 spin_lock_bh(&iface_stat_list_lock);
1994 list_for_each_entry(iface_entry, &iface_stat_list, list) {
1995 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1996 node = rb_first(&iface_entry->tag_stat_tree);
1998 ts_entry = rb_entry(node, struct tag_stat, tn.node);
1999 entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2000 node = rb_next(node);
2002 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2003 "ts tag=0x%llx (uid=%u)\n",
2004 input, ts_entry->tn.tag, entry_uid);
2006 if (entry_uid != uid)
2008 if (!acct_tag || ts_entry->tn.tag == tag) {
2009 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2010 "erase ts: %s 0x%llx %u\n",
2011 input, iface_entry->ifname,
2012 get_atag_from_tag(ts_entry->tn.tag),
2014 rb_erase(&ts_entry->tn.node,
2015 &iface_entry->tag_stat_tree);
2019 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2021 spin_unlock_bh(&iface_stat_list_lock);
2023 /* Cleanup the uid_tag_data */
2024 spin_lock_bh(&uid_tag_data_tree_lock);
2025 node = rb_first(&uid_tag_data_tree);
2027 utd_entry = rb_entry(node, struct uid_tag_data, node);
2028 entry_uid = utd_entry->uid;
2029 node = rb_next(node);
2031 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2035 if (entry_uid != uid)
2038 * Go over the tag_refs, and those that don't have
2039 * sock_tags using them are freed.
2041 put_tag_ref_tree(tag, utd_entry);
2042 put_utd_entry(utd_entry);
2044 spin_unlock_bh(&uid_tag_data_tree_lock);
2046 atomic64_inc(&qtu_events.delete_cmds);
2053 static int ctrl_cmd_counter_set(const char *input)
2059 struct tag_counter_set *tcs;
2062 argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2063 CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2064 "set=%d uid=%u\n", input, argc, cmd,
2070 if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2071 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2076 if (!can_manipulate_uids()) {
2077 pr_info("qtaguid: ctrl_counterset(%s): "
2078 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2079 input, current->pid, current->tgid, current_fsuid());
2084 tag = make_tag_from_uid(uid);
2085 spin_lock_bh(&tag_counter_set_list_lock);
2086 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2088 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2090 spin_unlock_bh(&tag_counter_set_list_lock);
2091 pr_err("qtaguid: ctrl_counterset(%s): "
2092 "failed to alloc counter set\n",
2098 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2099 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2100 "(uid=%u) set=%d\n",
2101 input, tag, get_uid_from_tag(tag), counter_set);
2103 tcs->active_set = counter_set;
2104 spin_unlock_bh(&tag_counter_set_list_lock);
2105 atomic64_inc(&qtu_events.counter_set_changes);
2112 static int ctrl_cmd_tag(const char *input)
2117 tag_t acct_tag = make_atag_from_value(0);
2119 struct socket *el_socket;
2121 struct sock_tag *sock_tag_entry;
2122 struct tag_ref *tag_ref_entry;
2123 struct uid_tag_data *uid_tag_data_entry;
2124 struct proc_qtu_data *pqd_entry;
2126 /* Unassigned args will get defaulted later. */
2127 argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2128 CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2129 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2135 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2137 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2138 " sock_fd=%d err=%d\n", input, sock_fd, res);
2141 CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2142 input, atomic_long_read(&el_socket->file->f_count),
2145 acct_tag = make_atag_from_value(0);
2146 } else if (!valid_atag(acct_tag)) {
2147 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2151 CT_DEBUG("qtaguid: ctrl_tag(%s): "
2152 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2153 "in_group=%d in_egroup=%d\n",
2154 input, current->pid, current->tgid, current_uid(),
2155 current_euid(), current_fsuid(),
2156 in_group_p(proc_ctrl_write_gid),
2157 in_egroup_p(proc_ctrl_write_gid));
2159 uid = current_fsuid();
2160 } else if (!can_impersonate_uid(uid)) {
2161 pr_info("qtaguid: ctrl_tag(%s): "
2162 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2163 input, current->pid, current->tgid, current_fsuid());
2167 full_tag = combine_atag_with_uid(acct_tag, uid);
2169 spin_lock_bh(&sock_tag_list_lock);
2170 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2171 tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2172 if (IS_ERR(tag_ref_entry)) {
2173 res = PTR_ERR(tag_ref_entry);
2174 spin_unlock_bh(&sock_tag_list_lock);
2177 tag_ref_entry->num_sock_tags++;
2178 if (sock_tag_entry) {
2179 struct tag_ref *prev_tag_ref_entry;
2181 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2182 "st@%p ...->f_count=%ld\n",
2183 input, el_socket->sk, sock_tag_entry,
2184 atomic_long_read(&el_socket->file->f_count));
2186 * This is a re-tagging, so release the sock_fd that was
2187 * locked at the time of the 1st tagging.
2188 * There is still the ref from this call's sockfd_lookup() so
2189 * it can be done within the spinlock.
2191 sockfd_put(sock_tag_entry->socket);
2192 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2193 &uid_tag_data_entry);
2194 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2195 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2196 prev_tag_ref_entry->num_sock_tags--;
2197 sock_tag_entry->tag = full_tag;
2199 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2200 input, el_socket->sk);
2201 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2203 if (!sock_tag_entry) {
2204 pr_err("qtaguid: ctrl_tag(%s): "
2205 "socket tag alloc failed\n",
2207 spin_unlock_bh(&sock_tag_list_lock);
2209 goto err_tag_unref_put;
2211 sock_tag_entry->sk = el_socket->sk;
2212 sock_tag_entry->socket = el_socket;
2213 sock_tag_entry->pid = current->tgid;
2214 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2216 spin_lock_bh(&uid_tag_data_tree_lock);
2217 pqd_entry = proc_qtu_data_tree_search(
2218 &proc_qtu_data_tree, current->tgid);
2220 * TODO: remove if, and start failing.
2221 * At first, we want to catch user-space code that is not
2222 * opening the /dev/xt_qtaguid.
2224 if (IS_ERR_OR_NULL(pqd_entry))
2227 "User space forgot to open /dev/xt_qtaguid? "
2228 "pid=%u tgid=%u uid=%u\n", __func__,
2229 current->pid, current->tgid,
2232 list_add(&sock_tag_entry->list,
2233 &pqd_entry->sock_tag_list);
2234 spin_unlock_bh(&uid_tag_data_tree_lock);
2236 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2237 atomic64_inc(&qtu_events.sockets_tagged);
2239 spin_unlock_bh(&sock_tag_list_lock);
2240 /* We keep the ref to the socket (file) until it is untagged */
2241 CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2242 input, sock_tag_entry,
2243 atomic_long_read(&el_socket->file->f_count));
2247 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2248 tag_ref_entry->num_sock_tags--;
2249 free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2251 CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2252 input, atomic_long_read(&el_socket->file->f_count) - 1);
2253 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2254 sockfd_put(el_socket);
2258 CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2262 static int ctrl_cmd_untag(const char *input)
2266 struct socket *el_socket;
2268 struct sock_tag *sock_tag_entry;
2269 struct tag_ref *tag_ref_entry;
2270 struct uid_tag_data *utd_entry;
2271 struct proc_qtu_data *pqd_entry;
2273 argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2274 CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2275 input, argc, cmd, sock_fd);
2280 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2282 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2283 " sock_fd=%d err=%d\n", input, sock_fd, res);
2286 CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2287 input, atomic_long_read(&el_socket->file->f_count),
2289 spin_lock_bh(&sock_tag_list_lock);
2290 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2291 if (!sock_tag_entry) {
2292 spin_unlock_bh(&sock_tag_list_lock);
2297 * The socket already belongs to the current process
2298 * so it can do whatever it wants to it.
2300 rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2302 tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2303 BUG_ON(!tag_ref_entry);
2304 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2305 spin_lock_bh(&uid_tag_data_tree_lock);
2306 pqd_entry = proc_qtu_data_tree_search(
2307 &proc_qtu_data_tree, current->tgid);
2309 * TODO: remove if, and start failing.
2310 * At first, we want to catch user-space code that is not
2311 * opening the /dev/xt_qtaguid.
2313 if (IS_ERR_OR_NULL(pqd_entry))
2314 pr_warn_once("qtaguid: %s(): "
2315 "User space forgot to open /dev/xt_qtaguid? "
2316 "pid=%u tgid=%u uid=%u\n", __func__,
2317 current->pid, current->tgid, current_fsuid());
2319 list_del(&sock_tag_entry->list);
2320 spin_unlock_bh(&uid_tag_data_tree_lock);
2322 * We don't free tag_ref from the utd_entry here,
2323 * only during a cmd_delete().
2325 tag_ref_entry->num_sock_tags--;
2326 spin_unlock_bh(&sock_tag_list_lock);
2328 * Release the sock_fd that was grabbed at tag time,
2329 * and once more for the sockfd_lookup() here.
2331 sockfd_put(sock_tag_entry->socket);
2332 CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2333 input, sock_tag_entry,
2334 atomic_long_read(&el_socket->file->f_count) - 1);
2335 sockfd_put(el_socket);
2337 kfree(sock_tag_entry);
2338 atomic64_inc(&qtu_events.sockets_untagged);
2343 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2344 input, atomic_long_read(&el_socket->file->f_count) - 1);
2345 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2346 sockfd_put(el_socket);
2350 CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2354 static int qtaguid_ctrl_parse(const char *input, int count)
2360 /* Collect params for commands */
2363 res = ctrl_cmd_delete(input);
2367 res = ctrl_cmd_counter_set(input);
2371 res = ctrl_cmd_tag(input);
2375 res = ctrl_cmd_untag(input);
2385 CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2389 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2390 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2391 unsigned long count, void *data)
2393 char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2395 if (unlikely(module_passive))
2398 if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2401 if (copy_from_user(input_buf, buffer, count))
2404 input_buf[count] = '\0';
2405 return qtaguid_ctrl_parse(input_buf, count);
2408 struct proc_print_info {
2410 char **num_items_returned;
2411 struct iface_stat *iface_entry;
2412 struct tag_stat *ts_entry;
2418 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2421 struct data_counters *cnts;
2423 if (!ppi->item_index) {
2424 if (ppi->item_index++ < ppi->items_to_skip)
2426 len = snprintf(ppi->outp, ppi->char_count,
2427 "idx iface acct_tag_hex uid_tag_int cnt_set "
2428 "rx_bytes rx_packets "
2429 "tx_bytes tx_packets "
2430 "rx_tcp_bytes rx_tcp_packets "
2431 "rx_udp_bytes rx_udp_packets "
2432 "rx_other_bytes rx_other_packets "
2433 "tx_tcp_bytes tx_tcp_packets "
2434 "tx_udp_bytes tx_udp_packets "
2435 "tx_other_bytes tx_other_packets\n");
2437 tag_t tag = ppi->ts_entry->tn.tag;
2438 uid_t stat_uid = get_uid_from_tag(tag);
2440 if (!can_read_other_uid_stats(stat_uid)) {
2441 CT_DEBUG("qtaguid: stats line: "
2442 "%s 0x%llx %u: insufficient priv "
2443 "from pid=%u tgid=%u uid=%u\n",
2444 ppi->iface_entry->ifname,
2445 get_atag_from_tag(tag), stat_uid,
2446 current->pid, current->tgid, current_fsuid());
2449 if (ppi->item_index++ < ppi->items_to_skip)
2451 cnts = &ppi->ts_entry->counters;
2453 ppi->outp, ppi->char_count,
2454 "%d %s 0x%llx %u %u "
2464 ppi->iface_entry->ifname,
2465 get_atag_from_tag(tag),
2468 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2469 dc_sum_packets(cnts, cnt_set, IFS_RX),
2470 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2471 dc_sum_packets(cnts, cnt_set, IFS_TX),
2472 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2473 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2474 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2475 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2476 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2477 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2478 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2479 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2480 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2481 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2482 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2483 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2488 static bool pp_sets(struct proc_print_info *ppi)
2492 for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2494 len = pp_stats_line(ppi, counter_set);
2495 if (len >= ppi->char_count) {
2501 ppi->char_count -= len;
2502 (*ppi->num_items_returned)++;
2509 * Procfs reader to get all tag stats using style "1)" as described in
2511 * Groups all protocols tx/rx bytes.
2513 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2514 off_t items_to_skip, int char_count, int *eof,
2517 struct proc_print_info ppi;
2522 ppi.char_count = char_count;
2523 ppi.num_items_returned = num_items_returned;
2524 ppi.items_to_skip = items_to_skip;
2526 if (unlikely(module_passive)) {
2527 len = pp_stats_line(&ppi, 0);
2528 /* The header should always be shorter than the buffer. */
2529 BUG_ON(len >= ppi.char_count);
2530 (*num_items_returned)++;
2535 CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
2536 "char_count=%d *eof=%d\n", page, *num_items_returned,
2537 items_to_skip, char_count, *eof);
2542 /* The idx is there to help debug when things go belly up. */
2543 len = pp_stats_line(&ppi, 0);
2544 /* Don't advance the outp unless the whole line was printed */
2545 if (len >= ppi.char_count) {
2547 return ppi.outp - page;
2551 ppi.char_count -= len;
2552 (*num_items_returned)++;
2555 spin_lock_bh(&iface_stat_list_lock);
2556 list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2557 struct rb_node *node;
2558 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2559 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2561 node = rb_next(node)) {
2562 ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2563 if (!pp_sets(&ppi)) {
2565 &ppi.iface_entry->tag_stat_list_lock);
2566 spin_unlock_bh(&iface_stat_list_lock);
2567 return ppi.outp - page;
2570 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2572 spin_unlock_bh(&iface_stat_list_lock);
2575 return ppi.outp - page;
2578 /*------------------------------------------*/
2579 static int qtudev_open(struct inode *inode, struct file *file)
2581 struct uid_tag_data *utd_entry;
2582 struct proc_qtu_data *pqd_entry;
2583 struct proc_qtu_data *new_pqd_entry;
2585 bool utd_entry_found;
2587 if (unlikely(qtu_proc_handling_passive))
2590 DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2591 current->pid, current->tgid, current_fsuid());
2593 spin_lock_bh(&uid_tag_data_tree_lock);
2595 /* Look for existing uid data, or alloc one. */
2596 utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2597 if (IS_ERR_OR_NULL(utd_entry)) {
2598 res = PTR_ERR(utd_entry);
2602 /* Look for existing PID based proc_data */
2603 pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2606 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2607 "%s already opened\n",
2608 current->pid, current->tgid, current_fsuid(),
2611 goto err_unlock_free_utd;
2614 new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2615 if (!new_pqd_entry) {
2616 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2617 "proc data alloc failed\n",
2618 current->pid, current->tgid, current_fsuid());
2620 goto err_unlock_free_utd;
2622 new_pqd_entry->pid = current->tgid;
2623 INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2624 new_pqd_entry->parent_tag_data = utd_entry;
2625 utd_entry->num_pqd++;
2627 proc_qtu_data_tree_insert(new_pqd_entry,
2628 &proc_qtu_data_tree);
2630 spin_unlock_bh(&uid_tag_data_tree_lock);
2631 DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2632 current_fsuid(), new_pqd_entry);
2633 file->private_data = new_pqd_entry;
2636 err_unlock_free_utd:
2637 if (!utd_entry_found) {
2638 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2641 spin_unlock_bh(&uid_tag_data_tree_lock);
2646 static int qtudev_release(struct inode *inode, struct file *file)
2648 struct proc_qtu_data *pqd_entry = file->private_data;
2649 struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
2650 struct sock_tag *st_entry;
2651 struct rb_root st_to_free_tree = RB_ROOT;
2652 struct list_head *entry, *next;
2655 if (unlikely(qtu_proc_handling_passive))
2659 * Do not trust the current->pid, it might just be a kworker cleaning
2660 * up after a dead proc.
2662 DR_DEBUG("qtaguid: qtudev_release(): "
2663 "pid=%u tgid=%u uid=%u "
2664 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2665 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2666 pqd_entry, pqd_entry->pid, utd_entry,
2667 utd_entry->num_active_tags);
2669 spin_lock_bh(&sock_tag_list_lock);
2670 spin_lock_bh(&uid_tag_data_tree_lock);
2672 list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2673 st_entry = list_entry(entry, struct sock_tag, list);
2674 DR_DEBUG("qtaguid: %s(): "
2675 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2677 st_entry, st_entry->sk,
2678 current->pid, current->tgid,
2679 pqd_entry->parent_tag_data->uid);
2681 utd_entry = uid_tag_data_tree_search(
2683 get_uid_from_tag(st_entry->tag));
2684 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2685 DR_DEBUG("qtaguid: %s(): "
2686 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2687 st_entry->tag, utd_entry);
2688 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2691 BUG_ON(tr->num_sock_tags <= 0);
2692 tr->num_sock_tags--;
2693 free_tag_ref_from_utd_entry(tr, utd_entry);
2695 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2696 list_del(&st_entry->list);
2697 /* Can't sockfd_put() within spinlock, do it later. */
2698 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2701 * Try to free the utd_entry if no other proc_qtu_data is
2702 * using it (num_pqd is 0) and it doesn't have active tags
2703 * (num_active_tags is 0).
2705 put_utd_entry(utd_entry);
2708 rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2709 BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2710 pqd_entry->parent_tag_data->num_pqd--;
2711 put_utd_entry(pqd_entry->parent_tag_data);
2713 file->private_data = NULL;
2715 spin_unlock_bh(&uid_tag_data_tree_lock);
2716 spin_unlock_bh(&sock_tag_list_lock);
2719 sock_tag_tree_erase(&st_to_free_tree);
2721 prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2722 current->pid, current->tgid);
2726 /*------------------------------------------*/
2727 static const struct file_operations qtudev_fops = {
2728 .owner = THIS_MODULE,
2729 .open = qtudev_open,
2730 .release = qtudev_release,
2733 static struct miscdevice qtu_device = {
2734 .minor = MISC_DYNAMIC_MINOR,
2735 .name = QTU_DEV_NAME,
2736 .fops = &qtudev_fops,
2737 /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2740 /*------------------------------------------*/
2741 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2744 *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2745 if (!*res_procdir) {
2746 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2751 xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2753 if (!xt_qtaguid_ctrl_file) {
2754 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2759 xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2760 xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2762 xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2764 if (!xt_qtaguid_stats_file) {
2765 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2768 goto no_stats_entry;
2770 xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2772 * TODO: add support counter hacking
2773 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2778 remove_proc_entry("ctrl", *res_procdir);
2780 remove_proc_entry("xt_qtaguid", NULL);
2785 static struct xt_match qtaguid_mt_reg __read_mostly = {
2787 * This module masquerades as the "owner" module so that iptables
2788 * tools can deal with it.
2792 .family = NFPROTO_UNSPEC,
2793 .match = qtaguid_mt,
2794 .matchsize = sizeof(struct xt_qtaguid_match_info),
2798 static int __init qtaguid_mt_init(void)
2800 if (qtaguid_proc_register(&xt_qtaguid_procdir)
2801 || iface_stat_init(xt_qtaguid_procdir)
2802 || xt_register_match(&qtaguid_mt_reg)
2803 || misc_register(&qtu_device))
2809 * TODO: allow unloading of the module.
2810 * For now stats are permanent.
2811 * Kconfig forces'y/n' and never an 'm'.
2814 module_init(qtaguid_mt_init);
2815 MODULE_AUTHOR("jpa <jpa@google.com>");
2816 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2817 MODULE_LICENSE("GPL");
2818 MODULE_ALIAS("ipt_owner");
2819 MODULE_ALIAS("ip6t_owner");
2820 MODULE_ALIAS("ipt_qtaguid");
2821 MODULE_ALIAS("ip6t_qtaguid");