2 * Kernel iptables module to track stats for packets based on user tags.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
12 * There are run-time debug flags enabled via the debug_mask module param, or
13 * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/skbuff.h>
23 #include <linux/workqueue.h>
24 #include <net/addrconf.h>
29 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
30 #include <linux/netfilter_ipv6/ip6_tables.h>
33 #include <linux/netfilter/xt_socket.h>
34 #include "xt_qtaguid_internal.h"
35 #include "xt_qtaguid_print.h"
38 * We only use the xt_socket funcs within a similar context to avoid unexpected
41 #define XT_SOCKET_SUPPORTED_HOOKS \
42 ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
45 static const char *module_procdirname = "xt_qtaguid";
46 static struct proc_dir_entry *xt_qtaguid_procdir;
48 static unsigned int proc_iface_perms = S_IRUGO;
49 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
51 static struct proc_dir_entry *xt_qtaguid_stats_file;
52 static unsigned int proc_stats_perms = S_IRUGO;
53 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
55 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
57 /* Everybody can write. But proc_ctrl_write_limited is true by default which
58 * limits what can be controlled. See the can_*() functions.
60 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
61 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
63 /* Limited by default, so the gid of the ctrl and stats proc entries
64 * will limit what can be done. See the can_*() functions.
66 static bool proc_stats_readall_limited = true;
67 static bool proc_ctrl_write_limited = true;
69 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
71 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
75 * Limit the number of active tags (via socket tags) for a given UID.
76 * Multiple processes could share the UID.
78 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
79 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
82 * After the kernel has initiallized this module, it is still possible
84 * Setting passive to Y:
85 * - the iface stats handling will not act on notifications.
86 * - iptables matches will never match.
87 * - ctrl commands silently succeed.
88 * - stats are always empty.
89 * This is mostly usefull when a bug is suspected.
91 static bool module_passive;
92 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
95 * Control how qtaguid data is tracked per proc/uid.
96 * Setting tag_tracking_passive to Y:
97 * - don't create proc specific structs to track tags
98 * - don't check that active tag stats exceed some limits.
99 * - don't clean up socket tags on process exits.
100 * This is mostly usefull when a bug is suspected.
102 static bool qtu_proc_handling_passive;
103 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
106 #define QTU_DEV_NAME "xt_qtaguid"
108 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
109 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
111 /*---------------------------------------------------------------------------*/
112 static const char *iface_stat_procdirname = "iface_stat";
113 static struct proc_dir_entry *iface_stat_procdir;
115 * The iface_stat_all* will go away once userspace gets use to the new fields
116 * that have a format line.
118 static const char *iface_stat_all_procfilename = "iface_stat_all";
119 static struct proc_dir_entry *iface_stat_all_procfile;
120 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
121 static struct proc_dir_entry *iface_stat_fmt_procfile;
127 * iface_stat_list_lock
130 * uid_tag_data_tree_lock
131 * tag_counter_set_list_lock
132 * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
135 * Call tree with all lock holders as of 2012-04-27:
137 * iface_stat_fmt_proc_read()
138 * iface_stat_list_lock
139 * (struct iface_stat)
141 * qtaguid_ctrl_proc_read()
144 * (struct proc_qtu_data->sock_tag_list)
145 * prdebug_full_state()
148 * uid_tag_data_tree_lock
149 * (uid_tag_data_tree)
150 * (proc_qtu_data_tree)
151 * iface_stat_list_lock
153 * qtaguid_stats_proc_read()
154 * iface_stat_list_lock
155 * struct iface_stat->tag_stat_list_lock
158 * uid_tag_data_tree_lock
161 * sock_tag_data_list_lock
162 * uid_tag_data_tree_lock
163 * prdebug_full_state()
165 * uid_tag_data_tree_lock
166 * iface_stat_list_lock
168 * iface_netdev_event_handler()
169 * iface_stat_create()
170 * iface_stat_list_lock
171 * iface_stat_update()
172 * iface_stat_list_lock
174 * iface_inetaddr_event_handler()
175 * iface_stat_create()
176 * iface_stat_list_lock
177 * iface_stat_update()
178 * iface_stat_list_lock
180 * iface_inet6addr_event_handler()
181 * iface_stat_create_ipv6()
182 * iface_stat_list_lock
183 * iface_stat_update()
184 * iface_stat_list_lock
188 * if_tag_stat_update()
191 * struct iface_stat->tag_stat_list_lock
193 * get_active_counter_set()
194 * tag_counter_set_list_lock
196 * get_active_counter_set()
197 * tag_counter_set_list_lock
200 * qtaguid_ctrl_parse()
203 * tag_counter_set_list_lock
204 * iface_stat_list_lock
205 * struct iface_stat->tag_stat_list_lock
206 * uid_tag_data_tree_lock
207 * ctrl_cmd_counter_set()
208 * tag_counter_set_list_lock
213 * uid_tag_data_tree_lock
214 * (uid_tag_data_tree)
215 * uid_tag_data_tree_lock
216 * (proc_qtu_data_tree)
219 * uid_tag_data_tree_lock
222 static LIST_HEAD(iface_stat_list);
223 static DEFINE_SPINLOCK(iface_stat_list_lock);
225 static struct rb_root sock_tag_tree = RB_ROOT;
226 static DEFINE_SPINLOCK(sock_tag_list_lock);
228 static struct rb_root tag_counter_set_tree = RB_ROOT;
229 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
231 static struct rb_root uid_tag_data_tree = RB_ROOT;
232 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
234 static struct rb_root proc_qtu_data_tree = RB_ROOT;
235 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
237 static struct qtaguid_event_counts qtu_events;
238 /*----------------------------------------------*/
239 static bool can_manipulate_uids(void)
242 return in_egroup_p(xt_qtaguid_ctrl_file->gid)
243 || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited)
244 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
247 static bool can_impersonate_uid(uid_t uid)
249 return uid == current_fsuid() || can_manipulate_uids();
252 static bool can_read_other_uid_stats(uid_t uid)
255 return in_egroup_p(xt_qtaguid_stats_file->gid)
256 || unlikely(!current_fsuid()) || uid == current_fsuid()
257 || unlikely(!proc_stats_readall_limited)
258 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
261 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
262 enum ifs_tx_rx direction,
263 enum ifs_proto ifs_proto,
267 counters->bpc[set][direction][ifs_proto].bytes += bytes;
268 counters->bpc[set][direction][ifs_proto].packets += packets;
271 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
273 struct rb_node *node = root->rb_node;
276 struct tag_node *data = rb_entry(node, struct tag_node, node);
278 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
279 " node=%p data=%p\n", tag, node, data);
280 result = tag_compare(tag, data->tag);
281 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
282 " data.tag=0x%llx (uid=%u) res=%d\n",
283 tag, data->tag, get_uid_from_tag(data->tag), result);
285 node = node->rb_left;
287 node = node->rb_right;
294 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
296 struct rb_node **new = &(root->rb_node), *parent = NULL;
298 /* Figure out where to put new node */
300 struct tag_node *this = rb_entry(*new, struct tag_node,
302 int result = tag_compare(data->tag, this->tag);
303 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
304 " (uid=%u)\n", __func__,
306 get_uid_from_tag(this->tag));
309 new = &((*new)->rb_left);
311 new = &((*new)->rb_right);
316 /* Add new node and rebalance tree. */
317 rb_link_node(&data->node, parent, new);
318 rb_insert_color(&data->node, root);
321 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
323 tag_node_tree_insert(&data->tn, root);
326 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
328 struct tag_node *node = tag_node_tree_search(root, tag);
331 return rb_entry(&node->node, struct tag_stat, tn.node);
334 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
335 struct rb_root *root)
337 tag_node_tree_insert(&data->tn, root);
340 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
343 struct tag_node *node = tag_node_tree_search(root, tag);
346 return rb_entry(&node->node, struct tag_counter_set, tn.node);
350 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
352 tag_node_tree_insert(&data->tn, root);
355 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
357 struct tag_node *node = tag_node_tree_search(root, tag);
360 return rb_entry(&node->node, struct tag_ref, tn.node);
363 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
364 const struct sock *sk)
366 struct rb_node *node = root->rb_node;
369 struct sock_tag *data = rb_entry(node, struct sock_tag,
372 node = node->rb_left;
373 else if (sk > data->sk)
374 node = node->rb_right;
381 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
383 struct rb_node **new = &(root->rb_node), *parent = NULL;
385 /* Figure out where to put new node */
387 struct sock_tag *this = rb_entry(*new, struct sock_tag,
390 if (data->sk < this->sk)
391 new = &((*new)->rb_left);
392 else if (data->sk > this->sk)
393 new = &((*new)->rb_right);
398 /* Add new node and rebalance tree. */
399 rb_link_node(&data->sock_node, parent, new);
400 rb_insert_color(&data->sock_node, root);
403 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
405 struct rb_node *node;
406 struct sock_tag *st_entry;
408 node = rb_first(st_to_free_tree);
410 st_entry = rb_entry(node, struct sock_tag, sock_node);
411 node = rb_next(node);
412 CT_DEBUG("qtaguid: %s(): "
413 "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
416 get_uid_from_tag(st_entry->tag));
417 rb_erase(&st_entry->sock_node, st_to_free_tree);
418 sockfd_put(st_entry->socket);
423 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
426 struct rb_node *node = root->rb_node;
429 struct proc_qtu_data *data = rb_entry(node,
430 struct proc_qtu_data,
433 node = node->rb_left;
434 else if (pid > data->pid)
435 node = node->rb_right;
442 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
443 struct rb_root *root)
445 struct rb_node **new = &(root->rb_node), *parent = NULL;
447 /* Figure out where to put new node */
449 struct proc_qtu_data *this = rb_entry(*new,
450 struct proc_qtu_data,
453 if (data->pid < this->pid)
454 new = &((*new)->rb_left);
455 else if (data->pid > this->pid)
456 new = &((*new)->rb_right);
461 /* Add new node and rebalance tree. */
462 rb_link_node(&data->node, parent, new);
463 rb_insert_color(&data->node, root);
466 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
467 struct rb_root *root)
469 struct rb_node **new = &(root->rb_node), *parent = NULL;
471 /* Figure out where to put new node */
473 struct uid_tag_data *this = rb_entry(*new,
477 if (data->uid < this->uid)
478 new = &((*new)->rb_left);
479 else if (data->uid > this->uid)
480 new = &((*new)->rb_right);
485 /* Add new node and rebalance tree. */
486 rb_link_node(&data->node, parent, new);
487 rb_insert_color(&data->node, root);
490 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
493 struct rb_node *node = root->rb_node;
496 struct uid_tag_data *data = rb_entry(node,
500 node = node->rb_left;
501 else if (uid > data->uid)
502 node = node->rb_right;
510 * Allocates a new uid_tag_data struct if needed.
511 * Returns a pointer to the found or allocated uid_tag_data.
512 * Returns a PTR_ERR on failures, and lock is not held.
513 * If found is not NULL:
514 * sets *found to true if not allocated.
515 * sets *found to false if allocated.
517 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
519 struct uid_tag_data *utd_entry;
521 /* Look for top level uid_tag_data for the UID */
522 utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
523 DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
526 *found_res = utd_entry;
530 utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
532 pr_err("qtaguid: get_uid_data(%u): "
533 "tag data alloc failed\n", uid);
534 return ERR_PTR(-ENOMEM);
537 utd_entry->uid = uid;
538 utd_entry->tag_ref_tree = RB_ROOT;
539 uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
540 DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
544 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
545 static struct tag_ref *new_tag_ref(tag_t new_tag,
546 struct uid_tag_data *utd_entry)
548 struct tag_ref *tr_entry;
551 if (utd_entry->num_active_tags + 1 > max_sock_tags) {
552 pr_info("qtaguid: new_tag_ref(0x%llx): "
553 "tag ref alloc quota exceeded. max=%d\n",
554 new_tag, max_sock_tags);
560 tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
562 pr_err("qtaguid: new_tag_ref(0x%llx): "
563 "tag ref alloc failed\n",
568 tr_entry->tn.tag = new_tag;
569 /* tr_entry->num_sock_tags handled by caller */
570 utd_entry->num_active_tags++;
571 tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
572 DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
573 " inserted new tag ref %p\n",
581 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
582 struct uid_tag_data **utd_res)
584 struct uid_tag_data *utd_entry;
585 struct tag_ref *tr_entry;
587 uid_t uid = get_uid_from_tag(full_tag);
589 DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
592 utd_entry = get_uid_data(uid, &found_utd);
593 if (IS_ERR_OR_NULL(utd_entry)) {
595 *utd_res = utd_entry;
599 tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
601 *utd_res = utd_entry;
602 DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
603 full_tag, utd_entry, tr_entry);
607 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
608 static struct tag_ref *get_tag_ref(tag_t full_tag,
609 struct uid_tag_data **utd_res)
611 struct uid_tag_data *utd_entry;
612 struct tag_ref *tr_entry;
614 DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
616 spin_lock_bh(&uid_tag_data_tree_lock);
617 tr_entry = lookup_tag_ref(full_tag, &utd_entry);
618 BUG_ON(IS_ERR_OR_NULL(utd_entry));
620 tr_entry = new_tag_ref(full_tag, utd_entry);
622 spin_unlock_bh(&uid_tag_data_tree_lock);
624 *utd_res = utd_entry;
625 DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
626 full_tag, utd_entry, tr_entry);
630 /* Checks and maybe frees the UID Tag Data entry */
631 static void put_utd_entry(struct uid_tag_data *utd_entry)
633 /* Are we done with the UID tag data entry? */
634 if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
635 !utd_entry->num_pqd) {
636 DR_DEBUG("qtaguid: %s(): "
637 "erase utd_entry=%p uid=%u "
638 "by pid=%u tgid=%u uid=%u\n", __func__,
639 utd_entry, utd_entry->uid,
640 current->pid, current->tgid, current_fsuid());
641 BUG_ON(utd_entry->num_active_tags);
642 rb_erase(&utd_entry->node, &uid_tag_data_tree);
645 DR_DEBUG("qtaguid: %s(): "
646 "utd_entry=%p still has %d tags %d proc_qtu_data\n",
647 __func__, utd_entry, utd_entry->num_active_tags,
649 BUG_ON(!(utd_entry->num_active_tags ||
650 utd_entry->num_pqd));
655 * If no sock_tags are using this tag_ref,
656 * decrements refcount of utd_entry, removes tr_entry
657 * from utd_entry->tag_ref_tree and frees.
659 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
660 struct uid_tag_data *utd_entry)
662 DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
663 tr_entry, tr_entry->tn.tag,
664 get_uid_from_tag(tr_entry->tn.tag));
665 if (!tr_entry->num_sock_tags) {
666 BUG_ON(!utd_entry->num_active_tags);
667 utd_entry->num_active_tags--;
668 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
669 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
674 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
676 struct rb_node *node;
677 struct tag_ref *tr_entry;
680 DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
681 full_tag, get_uid_from_tag(full_tag));
682 acct_tag = get_atag_from_tag(full_tag);
683 node = rb_first(&utd_entry->tag_ref_tree);
685 tr_entry = rb_entry(node, struct tag_ref, tn.node);
686 node = rb_next(node);
687 if (!acct_tag || tr_entry->tn.tag == full_tag)
688 free_tag_ref_from_utd_entry(tr_entry, utd_entry);
692 static int read_proc_u64(char *page, char **start, off_t off,
693 int count, int *eof, void *data)
698 uint64_t *iface_entry = data;
703 value = *iface_entry;
704 p += sprintf(p, "%llu\n", value);
705 len = (p - page) - off;
706 *eof = (len <= count) ? 1 : 0;
711 static int read_proc_bool(char *page, char **start, off_t off,
712 int count, int *eof, void *data)
717 bool *bool_entry = data;
723 p += sprintf(p, "%u\n", value);
724 len = (p - page) - off;
725 *eof = (len <= count) ? 1 : 0;
730 static int get_active_counter_set(tag_t tag)
733 struct tag_counter_set *tcs;
735 MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
737 tag, get_uid_from_tag(tag));
738 /* For now we only handle UID tags for active sets */
739 tag = get_utag_from_tag(tag);
740 spin_lock_bh(&tag_counter_set_list_lock);
741 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
743 active_set = tcs->active_set;
744 spin_unlock_bh(&tag_counter_set_list_lock);
749 * Find the entry for tracking the specified interface.
750 * Caller must hold iface_stat_list_lock
752 static struct iface_stat *get_iface_entry(const char *ifname)
754 struct iface_stat *iface_entry;
756 /* Find the entry for tracking the specified tag within the interface */
757 if (ifname == NULL) {
758 pr_info("qtaguid: iface_stat: get() NULL device name\n");
762 /* Iterate over interfaces */
763 list_for_each_entry(iface_entry, &iface_stat_list, list) {
764 if (!strcmp(ifname, iface_entry->ifname))
772 /* This is for fmt2 only */
773 static int pp_iface_stat_line(bool header, char *outp,
774 int char_count, struct iface_stat *iface_entry)
778 len = snprintf(outp, char_count,
780 "total_skb_rx_bytes total_skb_rx_packets "
781 "total_skb_tx_bytes total_skb_tx_packets "
782 "rx_tcp_bytes rx_tcp_packets "
783 "rx_udp_bytes rx_udp_packets "
784 "rx_other_bytes rx_other_packets "
785 "tx_tcp_bytes tx_tcp_packets "
786 "tx_udp_bytes tx_udp_packets "
787 "tx_other_bytes tx_other_packets\n"
790 struct data_counters *cnts;
791 int cnt_set = 0; /* We only use one set for the device */
792 cnts = &iface_entry->totals_via_skb;
796 "%llu %llu %llu %llu %llu %llu %llu %llu "
797 "%llu %llu %llu %llu %llu %llu %llu %llu\n",
799 dc_sum_bytes(cnts, cnt_set, IFS_RX),
800 dc_sum_packets(cnts, cnt_set, IFS_RX),
801 dc_sum_bytes(cnts, cnt_set, IFS_TX),
802 dc_sum_packets(cnts, cnt_set, IFS_TX),
803 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
804 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
805 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
806 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
807 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
808 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
809 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
810 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
811 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
812 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
813 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
814 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
819 static int iface_stat_fmt_proc_read(char *page, char **num_items_returned,
820 off_t items_to_skip, int char_count,
821 int *eof, void *data)
826 int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */
827 struct iface_stat *iface_entry;
828 struct rtnl_link_stats64 dev_stats, *stats;
829 struct rtnl_link_stats64 no_dev_stats = {0};
831 if (unlikely(module_passive)) {
836 CT_DEBUG("qtaguid:proc iface_stat_fmt "
837 "pid=%u tgid=%u uid=%u "
838 "page=%p *num_items_returned=%p off=%ld "
839 "char_count=%d *eof=%d\n",
840 current->pid, current->tgid, current_fsuid(),
841 page, *num_items_returned,
842 items_to_skip, char_count, *eof);
847 if (fmt == 2 && item_index++ >= items_to_skip) {
848 len = pp_iface_stat_line(true, outp, char_count, NULL);
849 if (len >= char_count) {
855 (*num_items_returned)++;
859 * This lock will prevent iface_stat_update() from changing active,
860 * and in turn prevent an interface from unregistering itself.
862 spin_lock_bh(&iface_stat_list_lock);
863 list_for_each_entry(iface_entry, &iface_stat_list, list) {
864 if (item_index++ < items_to_skip)
867 if (iface_entry->active) {
868 stats = dev_get_stats(iface_entry->net_dev,
871 stats = &no_dev_stats;
874 * If the meaning of the data changes, then update the fmtX
881 "%llu %llu %llu %llu "
882 "%llu %llu %llu %llu\n",
885 iface_entry->totals_via_dev[IFS_RX].bytes,
886 iface_entry->totals_via_dev[IFS_RX].packets,
887 iface_entry->totals_via_dev[IFS_TX].bytes,
888 iface_entry->totals_via_dev[IFS_TX].packets,
889 stats->rx_bytes, stats->rx_packets,
890 stats->tx_bytes, stats->tx_packets
893 len = pp_iface_stat_line(false, outp, char_count,
896 if (len >= char_count) {
897 spin_unlock_bh(&iface_stat_list_lock);
903 (*num_items_returned)++;
905 spin_unlock_bh(&iface_stat_list_lock);
911 static void iface_create_proc_worker(struct work_struct *work)
913 struct proc_dir_entry *proc_entry;
914 struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
916 struct iface_stat *new_iface = isw->iface_entry;
918 /* iface_entries are not deleted, so safe to manipulate. */
919 proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
920 if (IS_ERR_OR_NULL(proc_entry)) {
921 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
926 new_iface->proc_ptr = proc_entry;
928 create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
930 &new_iface->totals_via_dev[IFS_TX].bytes);
931 create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
933 &new_iface->totals_via_dev[IFS_RX].bytes);
934 create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
936 &new_iface->totals_via_dev[IFS_TX].packets);
937 create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
939 &new_iface->totals_via_dev[IFS_RX].packets);
940 create_proc_read_entry("active", proc_iface_perms, proc_entry,
941 read_proc_bool, &new_iface->active);
943 IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
944 "entry=%p dev=%s\n", new_iface, new_iface->ifname);
949 * Will set the entry's active state, and
950 * update the net_dev accordingly also.
952 static void _iface_stat_set_active(struct iface_stat *entry,
953 struct net_device *net_dev,
957 entry->net_dev = net_dev;
958 entry->active = true;
959 IF_DEBUG("qtaguid: %s(%s): "
960 "enable tracking. rfcnt=%d\n", __func__,
962 __this_cpu_read(*net_dev->pcpu_refcnt));
964 entry->active = false;
965 entry->net_dev = NULL;
966 IF_DEBUG("qtaguid: %s(%s): "
967 "disable tracking. rfcnt=%d\n", __func__,
969 __this_cpu_read(*net_dev->pcpu_refcnt));
974 /* Caller must hold iface_stat_list_lock */
975 static struct iface_stat *iface_alloc(struct net_device *net_dev)
977 struct iface_stat *new_iface;
978 struct iface_stat_work *isw;
980 new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
981 if (new_iface == NULL) {
982 pr_err("qtaguid: iface_stat: create(%s): "
983 "iface_stat alloc failed\n", net_dev->name);
986 new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
987 if (new_iface->ifname == NULL) {
988 pr_err("qtaguid: iface_stat: create(%s): "
989 "ifname alloc failed\n", net_dev->name);
993 spin_lock_init(&new_iface->tag_stat_list_lock);
994 new_iface->tag_stat_tree = RB_ROOT;
995 _iface_stat_set_active(new_iface, net_dev, true);
998 * ipv6 notifier chains are atomic :(
999 * No create_proc_read_entry() for you!
1001 isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
1003 pr_err("qtaguid: iface_stat: create(%s): "
1004 "work alloc failed\n", new_iface->ifname);
1005 _iface_stat_set_active(new_iface, net_dev, false);
1006 kfree(new_iface->ifname);
1010 isw->iface_entry = new_iface;
1011 INIT_WORK(&isw->iface_work, iface_create_proc_worker);
1012 schedule_work(&isw->iface_work);
1013 list_add(&new_iface->list, &iface_stat_list);
1017 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
1018 struct iface_stat *iface)
1020 struct rtnl_link_stats64 dev_stats, *stats;
1023 stats = dev_get_stats(net_dev, &dev_stats);
1024 /* No empty packets */
1026 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
1027 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
1029 IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
1030 "bytes rx/tx=%llu/%llu "
1031 "active=%d last_known=%d "
1032 "stats_rewound=%d\n", __func__,
1033 net_dev ? net_dev->name : "?",
1035 stats->rx_bytes, stats->tx_bytes,
1036 iface->active, iface->last_known_valid, stats_rewound);
1038 if (iface->active && iface->last_known_valid && stats_rewound) {
1039 pr_warn_once("qtaguid: iface_stat: %s(%s): "
1040 "iface reset its stats unexpectedly\n", __func__,
1043 iface->totals_via_dev[IFS_TX].bytes +=
1044 iface->last_known[IFS_TX].bytes;
1045 iface->totals_via_dev[IFS_TX].packets +=
1046 iface->last_known[IFS_TX].packets;
1047 iface->totals_via_dev[IFS_RX].bytes +=
1048 iface->last_known[IFS_RX].bytes;
1049 iface->totals_via_dev[IFS_RX].packets +=
1050 iface->last_known[IFS_RX].packets;
1051 iface->last_known_valid = false;
1052 IF_DEBUG("qtaguid: %s(%s): iface=%p "
1053 "used last known bytes rx/tx=%llu/%llu\n", __func__,
1054 iface->ifname, iface, iface->last_known[IFS_RX].bytes,
1055 iface->last_known[IFS_TX].bytes);
1060 * Create a new entry for tracking the specified interface.
1061 * Do nothing if the entry already exists.
1062 * Called when an interface is configured with a valid IP address.
1064 static void iface_stat_create(struct net_device *net_dev,
1065 struct in_ifaddr *ifa)
1067 struct in_device *in_dev = NULL;
1069 struct iface_stat *entry;
1071 struct iface_stat *new_iface;
1073 IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1074 net_dev ? net_dev->name : "?",
1077 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1081 ifname = net_dev->name;
1083 in_dev = in_dev_get(net_dev);
1085 pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1089 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1091 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1092 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1093 "ifa=%p ifa_label=%s\n",
1095 ifa->ifa_label ? ifa->ifa_label : "(null)");
1096 if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1102 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1106 ipaddr = ifa->ifa_local;
1108 spin_lock_bh(&iface_stat_list_lock);
1109 entry = get_iface_entry(ifname);
1110 if (entry != NULL) {
1111 bool activate = !ipv4_is_loopback(ipaddr);
1112 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1114 iface_check_stats_reset_and_adjust(net_dev, entry);
1115 _iface_stat_set_active(entry, net_dev, activate);
1116 IF_DEBUG("qtaguid: %s(%s): "
1117 "tracking now %d on ip=%pI4\n", __func__,
1118 entry->ifname, activate, &ipaddr);
1119 goto done_unlock_put;
1120 } else if (ipv4_is_loopback(ipaddr)) {
1121 IF_DEBUG("qtaguid: iface_stat: create(%s): "
1122 "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
1123 goto done_unlock_put;
1126 new_iface = iface_alloc(net_dev);
1127 IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1128 "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1130 spin_unlock_bh(&iface_stat_list_lock);
1136 static void iface_stat_create_ipv6(struct net_device *net_dev,
1137 struct inet6_ifaddr *ifa)
1139 struct in_device *in_dev;
1141 struct iface_stat *entry;
1142 struct iface_stat *new_iface;
1145 IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1146 ifa, net_dev, net_dev ? net_dev->name : "");
1148 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1151 ifname = net_dev->name;
1153 in_dev = in_dev_get(net_dev);
1155 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1160 IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1164 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1168 addr_type = ipv6_addr_type(&ifa->addr);
1170 spin_lock_bh(&iface_stat_list_lock);
1171 entry = get_iface_entry(ifname);
1172 if (entry != NULL) {
1173 bool activate = !(addr_type & IPV6_ADDR_LOOPBACK);
1174 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1176 iface_check_stats_reset_and_adjust(net_dev, entry);
1177 _iface_stat_set_active(entry, net_dev, activate);
1178 IF_DEBUG("qtaguid: %s(%s): "
1179 "tracking now %d on ip=%pI6c\n", __func__,
1180 entry->ifname, activate, &ifa->addr);
1181 goto done_unlock_put;
1182 } else if (addr_type & IPV6_ADDR_LOOPBACK) {
1183 IF_DEBUG("qtaguid: %s(%s): "
1184 "ignore loopback dev. ip=%pI6c\n", __func__,
1185 ifname, &ifa->addr);
1186 goto done_unlock_put;
1189 new_iface = iface_alloc(net_dev);
1190 IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1191 "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1194 spin_unlock_bh(&iface_stat_list_lock);
1199 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1201 MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1202 return sock_tag_tree_search(&sock_tag_tree, sk);
1205 static struct sock_tag *get_sock_stat(const struct sock *sk)
1207 struct sock_tag *sock_tag_entry;
1208 MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1211 spin_lock_bh(&sock_tag_list_lock);
1212 sock_tag_entry = get_sock_stat_nl(sk);
1213 spin_unlock_bh(&sock_tag_list_lock);
1214 return sock_tag_entry;
1217 static int ipx_proto(const struct sk_buff *skb,
1218 struct xt_action_param *par)
1220 int thoff = 0, tproto;
1222 switch (par->family) {
1224 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1226 MT_DEBUG("%s(): transport header not found in ipv6"
1227 " skb=%p\n", __func__, skb);
1230 tproto = ip_hdr(skb)->protocol;
1233 tproto = IPPROTO_RAW;
1239 data_counters_update(struct data_counters *dc, int set,
1240 enum ifs_tx_rx direction, int proto, int bytes)
1244 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1247 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1251 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1258 * Update stats for the specified interface. Do nothing if the entry
1259 * does not exist (when a device was never configured with an IP address).
1260 * Called when an device is being unregistered.
1262 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1264 struct rtnl_link_stats64 dev_stats, *stats;
1265 struct iface_stat *entry;
1267 stats = dev_get_stats(net_dev, &dev_stats);
1268 spin_lock_bh(&iface_stat_list_lock);
1269 entry = get_iface_entry(net_dev->name);
1270 if (entry == NULL) {
1271 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1273 spin_unlock_bh(&iface_stat_list_lock);
1277 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1278 net_dev->name, entry);
1279 if (!entry->active) {
1280 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1282 spin_unlock_bh(&iface_stat_list_lock);
1287 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1288 entry->last_known[IFS_TX].packets = stats->tx_packets;
1289 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1290 entry->last_known[IFS_RX].packets = stats->rx_packets;
1291 entry->last_known_valid = true;
1292 IF_DEBUG("qtaguid: %s(%s): "
1293 "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1294 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1295 spin_unlock_bh(&iface_stat_list_lock);
1298 entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1299 entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1300 entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1301 entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1302 /* We don't need the last_known[] anymore */
1303 entry->last_known_valid = false;
1304 _iface_stat_set_active(entry, net_dev, false);
1305 IF_DEBUG("qtaguid: %s(%s): "
1306 "disable tracking. rx/tx=%llu/%llu\n", __func__,
1307 net_dev->name, stats->rx_bytes, stats->tx_bytes);
1308 spin_unlock_bh(&iface_stat_list_lock);
1312 * Update stats for the specified interface from the skb.
1313 * Do nothing if the entry
1314 * does not exist (when a device was never configured with an IP address).
1315 * Called on each sk.
1317 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1318 struct xt_action_param *par)
1320 struct iface_stat *entry;
1321 const struct net_device *el_dev;
1322 enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1323 int bytes = skb->len;
1327 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1328 el_dev = par->in ? : par->out;
1330 const struct net_device *other_dev;
1332 other_dev = par->in ? : par->out;
1333 if (el_dev != other_dev) {
1334 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1335 "par->(in/out)=%p %s\n",
1336 par->hooknum, el_dev, el_dev->name, other_dev,
1341 if (unlikely(!el_dev)) {
1342 pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
1343 par->hooknum, __func__);
1345 } else if (unlikely(!el_dev->name)) {
1346 pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
1347 par->hooknum, __func__);
1350 proto = ipx_proto(skb, par);
1351 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1352 par->hooknum, el_dev->name, el_dev->type,
1353 par->family, proto);
1356 spin_lock_bh(&iface_stat_list_lock);
1357 entry = get_iface_entry(el_dev->name);
1358 if (entry == NULL) {
1359 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1360 __func__, el_dev->name);
1361 spin_unlock_bh(&iface_stat_list_lock);
1365 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1366 el_dev->name, entry);
1368 data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1370 spin_unlock_bh(&iface_stat_list_lock);
1373 static void tag_stat_update(struct tag_stat *tag_entry,
1374 enum ifs_tx_rx direction, int proto, int bytes)
1377 active_set = get_active_counter_set(tag_entry->tn.tag);
1378 MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1379 "dir=%d proto=%d bytes=%d)\n",
1380 tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1381 active_set, direction, proto, bytes);
1382 data_counters_update(&tag_entry->counters, active_set, direction,
1384 if (tag_entry->parent_counters)
1385 data_counters_update(tag_entry->parent_counters, active_set,
1386 direction, proto, bytes);
1390 * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1392 * iface_entry->tag_stat_list_lock should be held.
1394 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1397 struct tag_stat *new_tag_stat_entry = NULL;
1398 IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1399 " (uid=%u)\n", __func__,
1400 iface_entry, tag, get_uid_from_tag(tag));
1401 new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1402 if (!new_tag_stat_entry) {
1403 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1406 new_tag_stat_entry->tn.tag = tag;
1407 tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1409 return new_tag_stat_entry;
1412 static void if_tag_stat_update(const char *ifname, uid_t uid,
1413 const struct sock *sk, enum ifs_tx_rx direction,
1414 int proto, int bytes)
1416 struct tag_stat *tag_stat_entry;
1417 tag_t tag, acct_tag;
1419 struct data_counters *uid_tag_counters;
1420 struct sock_tag *sock_tag_entry;
1421 struct iface_stat *iface_entry;
1422 struct tag_stat *new_tag_stat = NULL;
1423 MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1424 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1425 ifname, uid, sk, direction, proto, bytes);
1428 iface_entry = get_iface_entry(ifname);
1430 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1434 /* It is ok to process data when an iface_entry is inactive */
1436 MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1437 ifname, iface_entry);
1440 * Look for a tagged sock.
1441 * It will have an acct_uid.
1443 sock_tag_entry = get_sock_stat(sk);
1444 if (sock_tag_entry) {
1445 tag = sock_tag_entry->tag;
1446 acct_tag = get_atag_from_tag(tag);
1447 uid_tag = get_utag_from_tag(tag);
1449 acct_tag = make_atag_from_value(0);
1450 tag = combine_atag_with_uid(acct_tag, uid);
1451 uid_tag = make_tag_from_uid(uid);
1453 MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1454 " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1455 tag, get_uid_from_tag(tag), iface_entry);
1456 /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1457 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1459 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1461 if (tag_stat_entry) {
1463 * Updating the {acct_tag, uid_tag} entry handles both stats:
1464 * {0, uid_tag} will also get updated.
1466 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1467 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1471 /* Loop over tag list under this interface for {0,uid_tag} */
1472 tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1474 if (!tag_stat_entry) {
1475 /* Here: the base uid_tag did not exist */
1477 * No parent counters. So
1478 * - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1480 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1483 uid_tag_counters = &new_tag_stat->counters;
1485 uid_tag_counters = &tag_stat_entry->counters;
1489 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1490 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1493 new_tag_stat->parent_counters = uid_tag_counters;
1496 * For new_tag_stat to be still NULL here would require:
1497 * {0, uid_tag} exists
1498 * and {acct_tag, uid_tag} doesn't exist
1499 * AND acct_tag == 0.
1500 * Impossible. This reassures us that new_tag_stat
1501 * below will always be assigned.
1503 BUG_ON(!new_tag_stat);
1505 tag_stat_update(new_tag_stat, direction, proto, bytes);
1507 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1510 static int iface_netdev_event_handler(struct notifier_block *nb,
1511 unsigned long event, void *ptr) {
1512 struct net_device *dev = ptr;
1514 if (unlikely(module_passive))
1517 IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1518 "ev=0x%lx/%s netdev=%p->name=%s\n",
1519 event, netdev_evt_str(event), dev, dev ? dev->name : "");
1523 iface_stat_create(dev, NULL);
1524 atomic64_inc(&qtu_events.iface_events);
1527 case NETDEV_UNREGISTER:
1528 iface_stat_update(dev, event == NETDEV_DOWN);
1529 atomic64_inc(&qtu_events.iface_events);
1535 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1536 unsigned long event, void *ptr)
1538 struct inet6_ifaddr *ifa = ptr;
1539 struct net_device *dev;
1541 if (unlikely(module_passive))
1544 IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1545 "ev=0x%lx/%s ifa=%p\n",
1546 event, netdev_evt_str(event), ifa);
1550 BUG_ON(!ifa || !ifa->idev);
1551 dev = (struct net_device *)ifa->idev->dev;
1552 iface_stat_create_ipv6(dev, ifa);
1553 atomic64_inc(&qtu_events.iface_events);
1556 case NETDEV_UNREGISTER:
1557 BUG_ON(!ifa || !ifa->idev);
1558 dev = (struct net_device *)ifa->idev->dev;
1559 iface_stat_update(dev, event == NETDEV_DOWN);
1560 atomic64_inc(&qtu_events.iface_events);
1566 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1567 unsigned long event, void *ptr)
1569 struct in_ifaddr *ifa = ptr;
1570 struct net_device *dev;
1572 if (unlikely(module_passive))
1575 IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1576 "ev=0x%lx/%s ifa=%p\n",
1577 event, netdev_evt_str(event), ifa);
1581 BUG_ON(!ifa || !ifa->ifa_dev);
1582 dev = ifa->ifa_dev->dev;
1583 iface_stat_create(dev, ifa);
1584 atomic64_inc(&qtu_events.iface_events);
1587 case NETDEV_UNREGISTER:
1588 BUG_ON(!ifa || !ifa->ifa_dev);
1589 dev = ifa->ifa_dev->dev;
1590 iface_stat_update(dev, event == NETDEV_DOWN);
1591 atomic64_inc(&qtu_events.iface_events);
1597 static struct notifier_block iface_netdev_notifier_blk = {
1598 .notifier_call = iface_netdev_event_handler,
1601 static struct notifier_block iface_inetaddr_notifier_blk = {
1602 .notifier_call = iface_inetaddr_event_handler,
1605 static struct notifier_block iface_inet6addr_notifier_blk = {
1606 .notifier_call = iface_inet6addr_event_handler,
1609 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1613 iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1614 if (!iface_stat_procdir) {
1615 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1620 iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1623 if (!iface_stat_all_procfile) {
1624 pr_err("qtaguid: iface_stat: init "
1625 " failed to create stat_old proc entry\n");
1629 iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read;
1630 iface_stat_all_procfile->data = (void *)1; /* fmt1 */
1632 iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename,
1635 if (!iface_stat_fmt_procfile) {
1636 pr_err("qtaguid: iface_stat: init "
1637 " failed to create stat_all proc entry\n");
1639 goto err_zap_all_stats_entry;
1641 iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read;
1642 iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */
1645 err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1647 pr_err("qtaguid: iface_stat: init "
1648 "failed to register dev event handler\n");
1649 goto err_zap_all_stats_entries;
1651 err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1653 pr_err("qtaguid: iface_stat: init "
1654 "failed to register ipv4 dev event handler\n");
1658 err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1660 pr_err("qtaguid: iface_stat: init "
1661 "failed to register ipv6 dev event handler\n");
1662 goto err_unreg_ip4_addr;
1667 unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1669 unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1670 err_zap_all_stats_entries:
1671 remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1672 err_zap_all_stats_entry:
1673 remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1675 remove_proc_entry(iface_stat_procdirname, parent_procdir);
1680 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1681 struct xt_action_param *par)
1684 unsigned int hook_mask = (1 << par->hooknum);
1686 MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1687 par->hooknum, par->family);
1690 * Let's not abuse the the xt_socket_get*_sk(), or else it will
1691 * return garbage SKs.
1693 if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1696 switch (par->family) {
1698 sk = xt_socket_get6_sk(skb, par);
1701 sk = xt_socket_get4_sk(skb, par);
1708 * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1709 * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1710 * Not fixed in 3.0-r3 :(
1713 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1714 "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1715 if (sk->sk_state == TCP_TIME_WAIT) {
1716 xt_socket_put_sk(sk);
1723 static void account_for_uid(const struct sk_buff *skb,
1724 const struct sock *alternate_sk, uid_t uid,
1725 struct xt_action_param *par)
1727 const struct net_device *el_dev;
1730 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1731 el_dev = par->in ? : par->out;
1733 const struct net_device *other_dev;
1735 other_dev = par->in ? : par->out;
1736 if (el_dev != other_dev) {
1737 MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1738 "par->(in/out)=%p %s\n",
1739 par->hooknum, el_dev, el_dev->name, other_dev,
1744 if (unlikely(!el_dev)) {
1745 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1746 } else if (unlikely(!el_dev->name)) {
1747 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1749 int proto = ipx_proto(skb, par);
1750 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1751 par->hooknum, el_dev->name, el_dev->type,
1752 par->family, proto);
1754 if_tag_stat_update(el_dev->name, uid,
1755 skb->sk ? skb->sk : alternate_sk,
1756 par->in ? IFS_RX : IFS_TX,
1761 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1763 const struct xt_qtaguid_match_info *info = par->matchinfo;
1764 const struct file *filp;
1765 bool got_sock = false;
1770 if (unlikely(module_passive))
1771 return (info->match ^ info->invert) == 0;
1773 MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1774 par->hooknum, skb, par->in, par->out, par->family);
1776 atomic64_inc(&qtu_events.match_calls);
1778 res = (info->match ^ info->invert) == 0;
1782 switch (par->hooknum) {
1783 case NF_INET_PRE_ROUTING:
1784 case NF_INET_POST_ROUTING:
1785 atomic64_inc(&qtu_events.match_calls_prepost);
1786 iface_stat_update_from_skb(skb, par);
1788 * We are done in pre/post. The skb will get processed
1791 res = (info->match ^ info->invert);
1794 /* default: Fall through and do UID releated work */
1800 * A missing sk->sk_socket happens when packets are in-flight
1801 * and the matching socket is already closed and gone.
1803 sk = qtaguid_find_sk(skb, par);
1805 * If we got the socket from the find_sk(), we will need to put
1806 * it back, as nf_tproxy_get_sock_v4() got it.
1810 atomic64_inc(&qtu_events.match_found_sk_in_ct);
1812 atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1814 atomic64_inc(&qtu_events.match_found_sk);
1816 MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1817 par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1819 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1820 par->hooknum, sk, sk->sk_socket,
1821 sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1822 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1823 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1824 par->hooknum, filp ? filp->f_cred->fsuid : -1);
1827 if (sk == NULL || sk->sk_socket == NULL) {
1829 * Here, the qtaguid_find_sk() using connection tracking
1830 * couldn't find the owner, so for now we just count them
1831 * against the system.
1834 * TODO: unhack how to force just accounting.
1835 * For now we only do iface stats when the uid-owner is not
1838 if (!(info->match & XT_QTAGUID_UID))
1839 account_for_uid(skb, sk, 0, par);
1840 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1842 sk ? sk->sk_socket : NULL);
1843 res = (info->match ^ info->invert) == 0;
1844 atomic64_inc(&qtu_events.match_no_sk);
1845 goto put_sock_ret_res;
1846 } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1848 goto put_sock_ret_res;
1850 filp = sk->sk_socket->file;
1852 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1853 account_for_uid(skb, sk, 0, par);
1854 res = ((info->match ^ info->invert) &
1855 (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1856 atomic64_inc(&qtu_events.match_no_sk_file);
1857 goto put_sock_ret_res;
1859 sock_uid = filp->f_cred->fsuid;
1861 * TODO: unhack how to force just accounting.
1862 * For now we only do iface stats when the uid-owner is not requested
1864 if (!(info->match & XT_QTAGUID_UID))
1865 account_for_uid(skb, sk, sock_uid, par);
1868 * The following two tests fail the match when:
1869 * id not in range AND no inverted condition requested
1870 * or id in range AND inverted condition requested
1871 * Thus (!a && b) || (a && !b) == a ^ b
1873 if (info->match & XT_QTAGUID_UID)
1874 if ((filp->f_cred->fsuid >= info->uid_min &&
1875 filp->f_cred->fsuid <= info->uid_max) ^
1876 !(info->invert & XT_QTAGUID_UID)) {
1877 MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1880 goto put_sock_ret_res;
1882 if (info->match & XT_QTAGUID_GID)
1883 if ((filp->f_cred->fsgid >= info->gid_min &&
1884 filp->f_cred->fsgid <= info->gid_max) ^
1885 !(info->invert & XT_QTAGUID_GID)) {
1886 MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1889 goto put_sock_ret_res;
1892 MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1897 xt_socket_put_sk(sk);
1899 MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1904 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1905 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1911 if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1914 fmt_buff = kasprintf(GFP_ATOMIC,
1915 "qtaguid: %s(): %s {\n", __func__, fmt);
1917 va_start(args, fmt);
1918 buff = kvasprintf(GFP_ATOMIC,
1921 pr_debug("%s", buff);
1926 spin_lock_bh(&sock_tag_list_lock);
1927 prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1928 spin_unlock_bh(&sock_tag_list_lock);
1930 spin_lock_bh(&sock_tag_list_lock);
1931 spin_lock_bh(&uid_tag_data_tree_lock);
1932 prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1933 prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1934 spin_unlock_bh(&uid_tag_data_tree_lock);
1935 spin_unlock_bh(&sock_tag_list_lock);
1937 spin_lock_bh(&iface_stat_list_lock);
1938 prdebug_iface_stat_list(indent_level, &iface_stat_list);
1939 spin_unlock_bh(&iface_stat_list_lock);
1941 pr_debug("qtaguid: %s(): }\n", __func__);
1944 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1948 * Procfs reader to get all active socket tags using style "1)" as described in
1951 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1952 off_t items_to_skip, int char_count, int *eof,
1958 struct rb_node *node;
1959 struct sock_tag *sock_tag_entry;
1961 int indent_level = 0;
1964 if (unlikely(module_passive)) {
1972 CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u "
1973 "page=%p off=%ld char_count=%d *eof=%d\n",
1974 current->pid, current->tgid, current_fsuid(),
1975 page, items_to_skip, char_count, *eof);
1977 spin_lock_bh(&sock_tag_list_lock);
1978 for (node = rb_first(&sock_tag_tree);
1980 node = rb_next(node)) {
1981 if (item_index++ < items_to_skip)
1983 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1984 uid = get_uid_from_tag(sock_tag_entry->tag);
1985 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1988 sock_tag_entry->tag,
1992 f_count = atomic_long_read(
1993 &sock_tag_entry->socket->file->f_count);
1994 len = snprintf(outp, char_count,
1995 "sock=%p tag=0x%llx (uid=%u) pid=%u "
1998 sock_tag_entry->tag, uid,
1999 sock_tag_entry->pid, f_count);
2000 if (len >= char_count) {
2001 spin_unlock_bh(&sock_tag_list_lock);
2007 (*num_items_returned)++;
2009 spin_unlock_bh(&sock_tag_list_lock);
2011 if (item_index++ >= items_to_skip) {
2012 len = snprintf(outp, char_count,
2013 "events: sockets_tagged=%llu "
2014 "sockets_untagged=%llu "
2015 "counter_set_changes=%llu "
2017 "iface_events=%llu "
2019 "match_calls_prepost=%llu "
2020 "match_found_sk=%llu "
2021 "match_found_sk_in_ct=%llu "
2022 "match_found_no_sk_in_ct=%llu "
2024 "match_no_sk_file=%llu\n",
2025 atomic64_read(&qtu_events.sockets_tagged),
2026 atomic64_read(&qtu_events.sockets_untagged),
2027 atomic64_read(&qtu_events.counter_set_changes),
2028 atomic64_read(&qtu_events.delete_cmds),
2029 atomic64_read(&qtu_events.iface_events),
2030 atomic64_read(&qtu_events.match_calls),
2031 atomic64_read(&qtu_events.match_calls_prepost),
2032 atomic64_read(&qtu_events.match_found_sk),
2033 atomic64_read(&qtu_events.match_found_sk_in_ct),
2035 &qtu_events.match_found_no_sk_in_ct),
2036 atomic64_read(&qtu_events.match_no_sk),
2037 atomic64_read(&qtu_events.match_no_sk_file));
2038 if (len >= char_count) {
2044 (*num_items_returned)++;
2047 /* Count the following as part of the last item_index */
2048 if (item_index > items_to_skip) {
2049 prdebug_full_state(indent_level, "proc ctrl");
2057 * Delete socket tags, and stat tags associated with a given
2058 * accouting tag and uid.
2060 static int ctrl_cmd_delete(const char *input)
2068 struct iface_stat *iface_entry;
2069 struct rb_node *node;
2070 struct sock_tag *st_entry;
2071 struct rb_root st_to_free_tree = RB_ROOT;
2072 struct tag_stat *ts_entry;
2073 struct tag_counter_set *tcs_entry;
2074 struct tag_ref *tr_entry;
2075 struct uid_tag_data *utd_entry;
2077 argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
2078 CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
2079 "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2085 if (!valid_atag(acct_tag)) {
2086 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2091 uid = current_fsuid();
2092 } else if (!can_impersonate_uid(uid)) {
2093 pr_info("qtaguid: ctrl_delete(%s): "
2094 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2095 input, current->pid, current->tgid, current_fsuid());
2100 tag = combine_atag_with_uid(acct_tag, uid);
2101 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2102 "looking for tag=0x%llx (uid=%u)\n",
2105 /* Delete socket tags */
2106 spin_lock_bh(&sock_tag_list_lock);
2107 node = rb_first(&sock_tag_tree);
2109 st_entry = rb_entry(node, struct sock_tag, sock_node);
2110 entry_uid = get_uid_from_tag(st_entry->tag);
2111 node = rb_next(node);
2112 if (entry_uid != uid)
2115 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2116 input, st_entry->tag, entry_uid);
2118 if (!acct_tag || st_entry->tag == tag) {
2119 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2120 /* Can't sockfd_put() within spinlock, do it later. */
2121 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2122 tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2123 BUG_ON(tr_entry->num_sock_tags <= 0);
2124 tr_entry->num_sock_tags--;
2126 * TODO: remove if, and start failing.
2127 * This is a hack to work around the fact that in some
2128 * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2129 * and are trying to work around apps
2130 * that didn't open the /dev/xt_qtaguid.
2132 if (st_entry->list.next && st_entry->list.prev)
2133 list_del(&st_entry->list);
2136 spin_unlock_bh(&sock_tag_list_lock);
2138 sock_tag_tree_erase(&st_to_free_tree);
2140 /* Delete tag counter-sets */
2141 spin_lock_bh(&tag_counter_set_list_lock);
2142 /* Counter sets are only on the uid tag, not full tag */
2143 tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2145 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2146 "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2149 get_uid_from_tag(tcs_entry->tn.tag),
2150 tcs_entry->active_set);
2151 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2154 spin_unlock_bh(&tag_counter_set_list_lock);
2157 * If acct_tag is 0, then all entries belonging to uid are
2160 spin_lock_bh(&iface_stat_list_lock);
2161 list_for_each_entry(iface_entry, &iface_stat_list, list) {
2162 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2163 node = rb_first(&iface_entry->tag_stat_tree);
2165 ts_entry = rb_entry(node, struct tag_stat, tn.node);
2166 entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2167 node = rb_next(node);
2169 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2170 "ts tag=0x%llx (uid=%u)\n",
2171 input, ts_entry->tn.tag, entry_uid);
2173 if (entry_uid != uid)
2175 if (!acct_tag || ts_entry->tn.tag == tag) {
2176 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2177 "erase ts: %s 0x%llx %u\n",
2178 input, iface_entry->ifname,
2179 get_atag_from_tag(ts_entry->tn.tag),
2181 rb_erase(&ts_entry->tn.node,
2182 &iface_entry->tag_stat_tree);
2186 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2188 spin_unlock_bh(&iface_stat_list_lock);
2190 /* Cleanup the uid_tag_data */
2191 spin_lock_bh(&uid_tag_data_tree_lock);
2192 node = rb_first(&uid_tag_data_tree);
2194 utd_entry = rb_entry(node, struct uid_tag_data, node);
2195 entry_uid = utd_entry->uid;
2196 node = rb_next(node);
2198 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2202 if (entry_uid != uid)
2205 * Go over the tag_refs, and those that don't have
2206 * sock_tags using them are freed.
2208 put_tag_ref_tree(tag, utd_entry);
2209 put_utd_entry(utd_entry);
2211 spin_unlock_bh(&uid_tag_data_tree_lock);
2213 atomic64_inc(&qtu_events.delete_cmds);
2220 static int ctrl_cmd_counter_set(const char *input)
2226 struct tag_counter_set *tcs;
2229 argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2230 CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2231 "set=%d uid=%u\n", input, argc, cmd,
2237 if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2238 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2243 if (!can_manipulate_uids()) {
2244 pr_info("qtaguid: ctrl_counterset(%s): "
2245 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2246 input, current->pid, current->tgid, current_fsuid());
2251 tag = make_tag_from_uid(uid);
2252 spin_lock_bh(&tag_counter_set_list_lock);
2253 tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2255 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2257 spin_unlock_bh(&tag_counter_set_list_lock);
2258 pr_err("qtaguid: ctrl_counterset(%s): "
2259 "failed to alloc counter set\n",
2265 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2266 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2267 "(uid=%u) set=%d\n",
2268 input, tag, get_uid_from_tag(tag), counter_set);
2270 tcs->active_set = counter_set;
2271 spin_unlock_bh(&tag_counter_set_list_lock);
2272 atomic64_inc(&qtu_events.counter_set_changes);
2279 static int ctrl_cmd_tag(const char *input)
2284 tag_t acct_tag = make_atag_from_value(0);
2286 struct socket *el_socket;
2288 struct sock_tag *sock_tag_entry;
2289 struct tag_ref *tag_ref_entry;
2290 struct uid_tag_data *uid_tag_data_entry;
2291 struct proc_qtu_data *pqd_entry;
2293 /* Unassigned args will get defaulted later. */
2294 argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2295 CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2296 "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2302 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2304 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2305 " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2306 input, sock_fd, res, current->pid, current->tgid,
2310 CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2311 input, atomic_long_read(&el_socket->file->f_count),
2314 acct_tag = make_atag_from_value(0);
2315 } else if (!valid_atag(acct_tag)) {
2316 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2320 CT_DEBUG("qtaguid: ctrl_tag(%s): "
2321 "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2322 "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2323 input, current->pid, current->tgid, current_uid(),
2324 current_euid(), current_fsuid(),
2325 xt_qtaguid_ctrl_file->gid,
2326 in_group_p(xt_qtaguid_ctrl_file->gid),
2327 in_egroup_p(xt_qtaguid_ctrl_file->gid));
2329 uid = current_fsuid();
2330 } else if (!can_impersonate_uid(uid)) {
2331 pr_info("qtaguid: ctrl_tag(%s): "
2332 "insufficient priv from pid=%u tgid=%u uid=%u\n",
2333 input, current->pid, current->tgid, current_fsuid());
2337 full_tag = combine_atag_with_uid(acct_tag, uid);
2339 spin_lock_bh(&sock_tag_list_lock);
2340 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2341 tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2342 if (IS_ERR(tag_ref_entry)) {
2343 res = PTR_ERR(tag_ref_entry);
2344 spin_unlock_bh(&sock_tag_list_lock);
2347 tag_ref_entry->num_sock_tags++;
2348 if (sock_tag_entry) {
2349 struct tag_ref *prev_tag_ref_entry;
2351 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2352 "st@%p ...->f_count=%ld\n",
2353 input, el_socket->sk, sock_tag_entry,
2354 atomic_long_read(&el_socket->file->f_count));
2356 * This is a re-tagging, so release the sock_fd that was
2357 * locked at the time of the 1st tagging.
2358 * There is still the ref from this call's sockfd_lookup() so
2359 * it can be done within the spinlock.
2361 sockfd_put(sock_tag_entry->socket);
2362 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2363 &uid_tag_data_entry);
2364 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2365 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2366 prev_tag_ref_entry->num_sock_tags--;
2367 sock_tag_entry->tag = full_tag;
2369 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2370 input, el_socket->sk);
2371 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2373 if (!sock_tag_entry) {
2374 pr_err("qtaguid: ctrl_tag(%s): "
2375 "socket tag alloc failed\n",
2377 spin_unlock_bh(&sock_tag_list_lock);
2379 goto err_tag_unref_put;
2381 sock_tag_entry->sk = el_socket->sk;
2382 sock_tag_entry->socket = el_socket;
2383 sock_tag_entry->pid = current->tgid;
2384 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2386 spin_lock_bh(&uid_tag_data_tree_lock);
2387 pqd_entry = proc_qtu_data_tree_search(
2388 &proc_qtu_data_tree, current->tgid);
2390 * TODO: remove if, and start failing.
2391 * At first, we want to catch user-space code that is not
2392 * opening the /dev/xt_qtaguid.
2394 if (IS_ERR_OR_NULL(pqd_entry))
2397 "User space forgot to open /dev/xt_qtaguid? "
2398 "pid=%u tgid=%u uid=%u\n", __func__,
2399 current->pid, current->tgid,
2402 list_add(&sock_tag_entry->list,
2403 &pqd_entry->sock_tag_list);
2404 spin_unlock_bh(&uid_tag_data_tree_lock);
2406 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2407 atomic64_inc(&qtu_events.sockets_tagged);
2409 spin_unlock_bh(&sock_tag_list_lock);
2410 /* We keep the ref to the socket (file) until it is untagged */
2411 CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2412 input, sock_tag_entry,
2413 atomic_long_read(&el_socket->file->f_count));
2417 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2418 tag_ref_entry->num_sock_tags--;
2419 free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2421 CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2422 input, atomic_long_read(&el_socket->file->f_count) - 1);
2423 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2424 sockfd_put(el_socket);
2428 CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2432 static int ctrl_cmd_untag(const char *input)
2436 struct socket *el_socket;
2438 struct sock_tag *sock_tag_entry;
2439 struct tag_ref *tag_ref_entry;
2440 struct uid_tag_data *utd_entry;
2441 struct proc_qtu_data *pqd_entry;
2443 argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2444 CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2445 input, argc, cmd, sock_fd);
2450 el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
2452 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2453 " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2454 input, sock_fd, res, current->pid, current->tgid,
2458 CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2459 input, atomic_long_read(&el_socket->file->f_count),
2461 spin_lock_bh(&sock_tag_list_lock);
2462 sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2463 if (!sock_tag_entry) {
2464 spin_unlock_bh(&sock_tag_list_lock);
2469 * The socket already belongs to the current process
2470 * so it can do whatever it wants to it.
2472 rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2474 tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2475 BUG_ON(!tag_ref_entry);
2476 BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2477 spin_lock_bh(&uid_tag_data_tree_lock);
2478 pqd_entry = proc_qtu_data_tree_search(
2479 &proc_qtu_data_tree, current->tgid);
2481 * TODO: remove if, and start failing.
2482 * At first, we want to catch user-space code that is not
2483 * opening the /dev/xt_qtaguid.
2485 if (IS_ERR_OR_NULL(pqd_entry))
2486 pr_warn_once("qtaguid: %s(): "
2487 "User space forgot to open /dev/xt_qtaguid? "
2488 "pid=%u tgid=%u uid=%u\n", __func__,
2489 current->pid, current->tgid, current_fsuid());
2491 list_del(&sock_tag_entry->list);
2492 spin_unlock_bh(&uid_tag_data_tree_lock);
2494 * We don't free tag_ref from the utd_entry here,
2495 * only during a cmd_delete().
2497 tag_ref_entry->num_sock_tags--;
2498 spin_unlock_bh(&sock_tag_list_lock);
2500 * Release the sock_fd that was grabbed at tag time,
2501 * and once more for the sockfd_lookup() here.
2503 sockfd_put(sock_tag_entry->socket);
2504 CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2505 input, sock_tag_entry,
2506 atomic_long_read(&el_socket->file->f_count) - 1);
2507 sockfd_put(el_socket);
2509 kfree(sock_tag_entry);
2510 atomic64_inc(&qtu_events.sockets_untagged);
2515 CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2516 input, atomic_long_read(&el_socket->file->f_count) - 1);
2517 /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2518 sockfd_put(el_socket);
2522 CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2526 static int qtaguid_ctrl_parse(const char *input, int count)
2531 CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2532 input, current->pid, current->tgid, current_fsuid());
2535 /* Collect params for commands */
2538 res = ctrl_cmd_delete(input);
2542 res = ctrl_cmd_counter_set(input);
2546 res = ctrl_cmd_tag(input);
2550 res = ctrl_cmd_untag(input);
2560 CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2564 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2565 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2566 unsigned long count, void *data)
2568 char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2570 if (unlikely(module_passive))
2573 if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2576 if (copy_from_user(input_buf, buffer, count))
2579 input_buf[count] = '\0';
2580 return qtaguid_ctrl_parse(input_buf, count);
2583 struct proc_print_info {
2585 char **num_items_returned;
2586 struct iface_stat *iface_entry;
2587 struct tag_stat *ts_entry;
2593 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2596 struct data_counters *cnts;
2598 if (!ppi->item_index) {
2599 if (ppi->item_index++ < ppi->items_to_skip)
2601 len = snprintf(ppi->outp, ppi->char_count,
2602 "idx iface acct_tag_hex uid_tag_int cnt_set "
2603 "rx_bytes rx_packets "
2604 "tx_bytes tx_packets "
2605 "rx_tcp_bytes rx_tcp_packets "
2606 "rx_udp_bytes rx_udp_packets "
2607 "rx_other_bytes rx_other_packets "
2608 "tx_tcp_bytes tx_tcp_packets "
2609 "tx_udp_bytes tx_udp_packets "
2610 "tx_other_bytes tx_other_packets\n");
2612 tag_t tag = ppi->ts_entry->tn.tag;
2613 uid_t stat_uid = get_uid_from_tag(tag);
2614 /* Detailed tags are not available to everybody */
2615 if (get_atag_from_tag(tag)
2616 && !can_read_other_uid_stats(stat_uid)) {
2617 CT_DEBUG("qtaguid: stats line: "
2618 "%s 0x%llx %u: insufficient priv "
2619 "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2620 ppi->iface_entry->ifname,
2621 get_atag_from_tag(tag), stat_uid,
2622 current->pid, current->tgid, current_fsuid(),
2623 xt_qtaguid_stats_file->gid);
2626 if (ppi->item_index++ < ppi->items_to_skip)
2628 cnts = &ppi->ts_entry->counters;
2630 ppi->outp, ppi->char_count,
2631 "%d %s 0x%llx %u %u "
2641 ppi->iface_entry->ifname,
2642 get_atag_from_tag(tag),
2645 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2646 dc_sum_packets(cnts, cnt_set, IFS_RX),
2647 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2648 dc_sum_packets(cnts, cnt_set, IFS_TX),
2649 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2650 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2651 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2652 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2653 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2654 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2655 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2656 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2657 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2658 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2659 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2660 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2665 static bool pp_sets(struct proc_print_info *ppi)
2669 for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2671 len = pp_stats_line(ppi, counter_set);
2672 if (len >= ppi->char_count) {
2678 ppi->char_count -= len;
2679 (*ppi->num_items_returned)++;
2686 * Procfs reader to get all tag stats using style "1)" as described in
2688 * Groups all protocols tx/rx bytes.
2690 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2691 off_t items_to_skip, int char_count, int *eof,
2694 struct proc_print_info ppi;
2699 ppi.char_count = char_count;
2700 ppi.num_items_returned = num_items_returned;
2701 ppi.items_to_skip = items_to_skip;
2703 if (unlikely(module_passive)) {
2704 len = pp_stats_line(&ppi, 0);
2705 /* The header should always be shorter than the buffer. */
2706 BUG_ON(len >= ppi.char_count);
2707 (*num_items_returned)++;
2712 CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u "
2713 "page=%p *num_items_returned=%p off=%ld "
2714 "char_count=%d *eof=%d\n",
2715 current->pid, current->tgid, current_fsuid(),
2716 page, *num_items_returned,
2717 items_to_skip, char_count, *eof);
2722 /* The idx is there to help debug when things go belly up. */
2723 len = pp_stats_line(&ppi, 0);
2724 /* Don't advance the outp unless the whole line was printed */
2725 if (len >= ppi.char_count) {
2727 return ppi.outp - page;
2731 ppi.char_count -= len;
2732 (*num_items_returned)++;
2735 spin_lock_bh(&iface_stat_list_lock);
2736 list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2737 struct rb_node *node;
2738 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2739 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2741 node = rb_next(node)) {
2742 ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2743 if (!pp_sets(&ppi)) {
2745 &ppi.iface_entry->tag_stat_list_lock);
2746 spin_unlock_bh(&iface_stat_list_lock);
2747 return ppi.outp - page;
2750 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2752 spin_unlock_bh(&iface_stat_list_lock);
2755 return ppi.outp - page;
2758 /*------------------------------------------*/
2759 static int qtudev_open(struct inode *inode, struct file *file)
2761 struct uid_tag_data *utd_entry;
2762 struct proc_qtu_data *pqd_entry;
2763 struct proc_qtu_data *new_pqd_entry;
2765 bool utd_entry_found;
2767 if (unlikely(qtu_proc_handling_passive))
2770 DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2771 current->pid, current->tgid, current_fsuid());
2773 spin_lock_bh(&uid_tag_data_tree_lock);
2775 /* Look for existing uid data, or alloc one. */
2776 utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2777 if (IS_ERR_OR_NULL(utd_entry)) {
2778 res = PTR_ERR(utd_entry);
2782 /* Look for existing PID based proc_data */
2783 pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2786 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2787 "%s already opened\n",
2788 current->pid, current->tgid, current_fsuid(),
2791 goto err_unlock_free_utd;
2794 new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2795 if (!new_pqd_entry) {
2796 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2797 "proc data alloc failed\n",
2798 current->pid, current->tgid, current_fsuid());
2800 goto err_unlock_free_utd;
2802 new_pqd_entry->pid = current->tgid;
2803 INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2804 new_pqd_entry->parent_tag_data = utd_entry;
2805 utd_entry->num_pqd++;
2807 proc_qtu_data_tree_insert(new_pqd_entry,
2808 &proc_qtu_data_tree);
2810 spin_unlock_bh(&uid_tag_data_tree_lock);
2811 DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2812 current_fsuid(), new_pqd_entry);
2813 file->private_data = new_pqd_entry;
2816 err_unlock_free_utd:
2817 if (!utd_entry_found) {
2818 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2822 spin_unlock_bh(&uid_tag_data_tree_lock);
2826 static int qtudev_release(struct inode *inode, struct file *file)
2828 struct proc_qtu_data *pqd_entry = file->private_data;
2829 struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
2830 struct sock_tag *st_entry;
2831 struct rb_root st_to_free_tree = RB_ROOT;
2832 struct list_head *entry, *next;
2835 if (unlikely(qtu_proc_handling_passive))
2839 * Do not trust the current->pid, it might just be a kworker cleaning
2840 * up after a dead proc.
2842 DR_DEBUG("qtaguid: qtudev_release(): "
2843 "pid=%u tgid=%u uid=%u "
2844 "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2845 current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2846 pqd_entry, pqd_entry->pid, utd_entry,
2847 utd_entry->num_active_tags);
2849 spin_lock_bh(&sock_tag_list_lock);
2850 spin_lock_bh(&uid_tag_data_tree_lock);
2852 list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2853 st_entry = list_entry(entry, struct sock_tag, list);
2854 DR_DEBUG("qtaguid: %s(): "
2855 "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2857 st_entry, st_entry->sk,
2858 current->pid, current->tgid,
2859 pqd_entry->parent_tag_data->uid);
2861 utd_entry = uid_tag_data_tree_search(
2863 get_uid_from_tag(st_entry->tag));
2864 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2865 DR_DEBUG("qtaguid: %s(): "
2866 "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2867 st_entry->tag, utd_entry);
2868 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2871 BUG_ON(tr->num_sock_tags <= 0);
2872 tr->num_sock_tags--;
2873 free_tag_ref_from_utd_entry(tr, utd_entry);
2875 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2876 list_del(&st_entry->list);
2877 /* Can't sockfd_put() within spinlock, do it later. */
2878 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2881 * Try to free the utd_entry if no other proc_qtu_data is
2882 * using it (num_pqd is 0) and it doesn't have active tags
2883 * (num_active_tags is 0).
2885 put_utd_entry(utd_entry);
2888 rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2889 BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2890 pqd_entry->parent_tag_data->num_pqd--;
2891 put_utd_entry(pqd_entry->parent_tag_data);
2893 file->private_data = NULL;
2895 spin_unlock_bh(&uid_tag_data_tree_lock);
2896 spin_unlock_bh(&sock_tag_list_lock);
2899 sock_tag_tree_erase(&st_to_free_tree);
2901 prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2902 current->pid, current->tgid);
2906 /*------------------------------------------*/
2907 static const struct file_operations qtudev_fops = {
2908 .owner = THIS_MODULE,
2909 .open = qtudev_open,
2910 .release = qtudev_release,
2913 static struct miscdevice qtu_device = {
2914 .minor = MISC_DYNAMIC_MINOR,
2915 .name = QTU_DEV_NAME,
2916 .fops = &qtudev_fops,
2917 /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2920 /*------------------------------------------*/
2921 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2924 *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2925 if (!*res_procdir) {
2926 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2931 xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2933 if (!xt_qtaguid_ctrl_file) {
2934 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2939 xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2940 xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2942 xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2944 if (!xt_qtaguid_stats_file) {
2945 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2948 goto no_stats_entry;
2950 xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2952 * TODO: add support counter hacking
2953 * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2958 remove_proc_entry("ctrl", *res_procdir);
2960 remove_proc_entry("xt_qtaguid", NULL);
2965 static struct xt_match qtaguid_mt_reg __read_mostly = {
2967 * This module masquerades as the "owner" module so that iptables
2968 * tools can deal with it.
2972 .family = NFPROTO_UNSPEC,
2973 .match = qtaguid_mt,
2974 .matchsize = sizeof(struct xt_qtaguid_match_info),
2978 static int __init qtaguid_mt_init(void)
2980 if (qtaguid_proc_register(&xt_qtaguid_procdir)
2981 || iface_stat_init(xt_qtaguid_procdir)
2982 || xt_register_match(&qtaguid_mt_reg)
2983 || misc_register(&qtu_device))
2989 * TODO: allow unloading of the module.
2990 * For now stats are permanent.
2991 * Kconfig forces'y/n' and never an 'm'.
2994 module_init(qtaguid_mt_init);
2995 MODULE_AUTHOR("jpa <jpa@google.com>");
2996 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2997 MODULE_LICENSE("GPL");
2998 MODULE_ALIAS("ipt_owner");
2999 MODULE_ALIAS("ip6t_owner");
3000 MODULE_ALIAS("ipt_qtaguid");
3001 MODULE_ALIAS("ip6t_qtaguid");