1 package edu.uci.iotproject.analysis;
3 import edu.uci.iotproject.Conversation;
4 import edu.uci.iotproject.DnsMap;
5 import edu.uci.iotproject.util.PcapPacketUtils;
6 import org.pcap4j.core.PcapPacket;
7 import org.pcap4j.packet.IpV4Packet;
8 import org.pcap4j.packet.TcpPacket;
13 * Utility functions for analyzing and structuring (sets of) {@link Conversation}s.
15 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
16 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
18 public class TcpConversationUtils {
23 * Given a {@link Conversation}, extract its set of "packet pairs", i.e., pairs of request-reply packets.
26 * <b>Note:</b> in the current implementation, if one endpoint sends multiple packets back-to-back with no
27 * interleaved reply packets from the other endpoint, such packets are converted to one-item pairs (i.e., instances
28 * of {@lin PcapPacketPair} where {@link PcapPacketPair#getSecond()} is {@code null}).
30 * @param conv The {@code Conversation} for which packet pairs are to be extracted.
31 * @return The packet pairs extracted from {@code conv}.
33 public static List<PcapPacketPair> extractPacketPairs(Conversation conv) {
34 List<PcapPacket> packets = conv.getPackets();
35 List<PcapPacketPair> pairs = new ArrayList<>();
37 while (i < packets.size()) {
38 PcapPacket p1 = packets.get(i);
39 String p1SrcIp = p1.get(IpV4Packet.class).getHeader().getSrcAddr().getHostAddress();
40 int p1SrcPort = p1.get(TcpPacket.class).getHeader().getSrcPort().valueAsInt();
41 if (i+1 < packets.size()) {
42 PcapPacket p2 = packets.get(i+1);
43 if (PcapPacketUtils.isSource(p2, p1SrcIp, p1SrcPort)) {
44 // Two packets in a row going in the same direction -> create one item pair for p1
45 pairs.add(new PcapPacketPair(p1, null));
46 // Advance one packet as the following two packets may form a valid two-item pair.
49 // The two packets form a response-reply pair, create two-item pair.
50 pairs.add(new PcapPacketPair(p1, p2));
51 // Advance two packets as we have already processed the packet at index i+1 in order to create the pair.
55 // Last packet of conversation => one item pair
56 pairs.add(new PcapPacketPair(p1, null));
57 // Advance i to ensure termination.
62 // TODO: what if there is long time between response and reply packet? Should we add a threshold and exclude those cases?
66 * Given a collection of TCP conversations and associated DNS mappings, groups the conversations by hostname.
67 * @param tcpConversations The collection of TCP conversations.
68 * @param ipHostnameMappings The associated DNS mappings.
69 * @return A map where each key is a hostname and its associated value is a list of conversations where one of the
70 * two communicating hosts is that hostname (i.e. its IP maps to the hostname).
72 public static Map<String, List<Conversation>> groupConversationsByHostname(Collection<Conversation> tcpConversations, DnsMap ipHostnameMappings) {
73 HashMap<String, List<Conversation>> result = new HashMap<>();
74 for (Conversation c : tcpConversations) {
75 if (c.getPackets().size() == 0) {
76 String warningStr = String.format("Detected a %s [%s] with no payload packets.",
77 c.getClass().getSimpleName(), c.toString());
78 System.err.println(warningStr);
81 IpV4Packet firstPacketIp = c.getPackets().get(0).get(IpV4Packet.class);
82 String ipSrc = firstPacketIp.getHeader().getSrcAddr().getHostAddress();
83 String ipDst = firstPacketIp.getHeader().getDstAddr().getHostAddress();
84 // Check if src or dst IP is associated with one or more hostnames.
85 Set<String> hostnames = ipHostnameMappings.getHostnamesForIp(ipSrc);
86 if (hostnames == null) {
87 // No luck with src ip (possibly because it's a client->srv packet), try dst ip.
88 hostnames = ipHostnameMappings.getHostnamesForIp(ipDst);
90 if (hostnames != null) {
91 // Put a reference to the conversation for each of the hostnames that the conversation's IP maps to.
92 for (String hostname : hostnames) {
93 List<Conversation> newValue = new ArrayList<>();
95 result.merge(hostname, newValue, (l1, l2) -> { l1.addAll(l2); return l1; });
97 if (hostnames.size() > 1) {
98 // Print notice of IP mapping to multiple hostnames (debugging)
99 System.err.println(String.format("%s: encountered an IP that maps to multiple (%d) hostnames",
100 TcpConversationUtils.class.getSimpleName(), hostnames.size()));
103 // If no hostname mapping, store conversation under the key that is the concatenation of the two IPs.
104 // In order to ensure consistency when mapping conversations, use lexicographic order to select which IP
106 String delimiter = "_";
107 // Note that the in case the comparison returns 0, the strings are equal, so it doesn't matter which of
108 // ipSrc and ipDst go first (also, this case should not occur in practice as it means that the device is
109 // communicating with itself!)
110 String key = ipSrc.compareTo(ipDst) <= 0 ? ipSrc + delimiter + ipDst : ipDst + delimiter + ipSrc;
111 List<Conversation> newValue = new ArrayList<>();
113 result.merge(key, newValue, (l1, l2) -> { l1.addAll(l2); return l1; });
119 public static Map<String, Integer> countPacketSequenceFrequencies(Collection<Conversation> conversations) {
120 Map<String, Integer> result = new HashMap<>();
121 for (Conversation conv : conversations) {
122 if (conv.getPackets().size() == 0) {
123 // Skip conversations with no payload packets.
126 StringBuilder sb = new StringBuilder();
127 for (PcapPacket pp : conv.getPackets()) {
128 sb.append(pp.length() + " ");
130 result.merge(sb.toString(), 1, (i1, i2) -> i1+i2);
136 * Given a {@link Collection} of {@link Conversation}s, builds a {@link Map} from {@link String} to {@link List}
137 * of {@link Conversation}s such that each key is the <em>concatenation of the packet lengths of all payload packets
138 * (i.e., the set of packets returned by {@link Conversation#getPackets()}) separated by a delimiter</em> of any
139 * {@link Conversation} pointed to by that key. In other words, what the {@link Conversation}s {@code cs} pointed to
140 * by the key {@code s} have in common is that they all contain exactly the same number of payload packets <em>and
141 * </em> these payload packets are identical across all {@code Conversation}s in {@code convs} in terms of packet
142 * length and packet order. For example, if the key is "152 440 550", this means that every individual
143 * {@code Conversation} in the list of {@code Conversation}s pointed to by that key contain exactly three payload
144 * packet of lengths 152, 440, and 550, and these three packets are ordered the in the order prescribed by the key.
146 * @param conversations The collection of {@code Conversation}s to group by packet sequence.
147 * @return a {@link Map} from {@link String} to {@link List} of {@link Conversation}s such that each key is the
148 * <em>concatenation of the packet lengths of all payload packets (i.e., the set of packets returned by
149 * {@link Conversation#getPackets()}) separated by a delimiter</em> of any {@link Conversation} pointed to
152 public static Map<String, List<Conversation>> groupConversationsByPacketSequence(Collection<Conversation> conversations) {
153 Map<String, List<Conversation>> result = new HashMap<>();
154 for (Conversation conv : conversations) {
155 if (conv.getPackets().size() == 0) {
156 // Skip conversations with no payload packets.
159 StringBuilder sb = new StringBuilder();
160 for (PcapPacket pp : conv.getPackets()) {
161 if (sb.length() != 0) {
162 // only add a space if there's preceding content
165 sb.append(pp.length());
167 List<Conversation> oneItemList = new ArrayList<>();
168 oneItemList.add(conv);
169 result.merge(sb.toString(), oneItemList, (oldList, newList) -> {
170 oldList.addAll(newList);
178 * Given a {@link Conversation}, counts the frequencies of each unique packet length seen as part of the
179 * {@code Conversation}.
180 * @param c The {@code Conversation} for which unique packet length frequencies are to be determined.
181 * @return A mapping from packet length to its frequency.
183 public static Map<Integer, Integer> countPacketLengthFrequencies(Conversation c) {
184 Map<Integer, Integer> result = new HashMap<>();
185 for (PcapPacket packet : c.getPackets()) {
186 result.merge(packet.length(), 1, (i1, i2) -> i1 + i2);
192 * Like {@link #countPacketLengthFrequencies(Conversation)}, but counts packet length frequencies for a collection
193 * of {@code Conversation}s, i.e., the frequency of a packet length becomes the total number of packets with that
194 * length across <em>all</em> {@code Conversation}s in {@code conversations}.
195 * @param conversations The collection of {@code Conversation}s for which packet length frequencies are to be
197 * @return A mapping from packet length to its frequency.
199 public static Map<Integer, Integer> countPacketLengthFrequencies(Collection<Conversation> conversations) {
200 Map<Integer, Integer> result = new HashMap<>();
201 for (Conversation c : conversations) {
202 Map<Integer, Integer> intermediateResult = countPacketLengthFrequencies(c);
203 for (Map.Entry<Integer, Integer> entry : intermediateResult.entrySet()) {
204 result.merge(entry.getKey(), entry.getValue(), (i1, i2) -> i1 + i2);
210 public static Map<String, Integer> countPacketPairFrequencies(Collection<PcapPacketPair> pairs) {
211 Map<String, Integer> result = new HashMap<>();
212 for (PcapPacketPair ppp : pairs) {
213 result.merge(ppp.toString(), 1, (i1, i2) -> i1 + i2);
218 public static Map<String, Map<String, Integer>> countPacketPairFrequenciesByHostname(Collection<Conversation> tcpConversations, DnsMap ipHostnameMappings) {
219 Map<String, List<Conversation>> convsByHostname = groupConversationsByHostname(tcpConversations, ipHostnameMappings);
220 HashMap<String, Map<String, Integer>> result = new HashMap<>();
221 for (Map.Entry<String, List<Conversation>> entry : convsByHostname.entrySet()) {
222 // Merge all packet pairs exchanged during the course of all conversations with hostname into one list
223 List<PcapPacketPair> allPairsExchangedWithHostname = new ArrayList<>();
224 entry.getValue().forEach(conversation -> allPairsExchangedWithHostname.addAll(extractPacketPairs(conversation)));
225 // Then count the frequencies of packet pairs exchanged with the hostname, irrespective of individual
227 result.put(entry.getKey(), countPacketPairFrequencies(allPairsExchangedWithHostname));