backing up first "working" version of SignatureDetector; note that this version does...
[pingpong.git] / Code / Projects / SmartPlugDetector / src / main / java / edu / uci / iotproject / detection / SignatureDetector.java
1 package edu.uci.iotproject.detection;
2
3 import edu.uci.iotproject.Conversation;
4 import edu.uci.iotproject.TcpReassembler;
5 import edu.uci.iotproject.analysis.TcpConversationUtils;
6 import edu.uci.iotproject.io.PcapHandleReader;
7 import edu.uci.iotproject.util.PrintUtils;
8 import org.pcap4j.core.*;
9
10 import java.util.*;
11
12 import static edu.uci.iotproject.util.PcapPacketUtils.*;
13
14 /**
15  * TODO add class documentation.
16  *
17  * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
18  * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
19  */
20 public class SignatureDetector implements PacketListener {
21
22     public static void main(String[] args) throws PcapNativeException, NotOpenException {
23         // Test client
24 //        String path = "/scratch/July-2018"; // Rahmadi
25         String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
26         final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
27         final String signatureFile = path + "/2018-07/dlink/offSignature1.sig";
28 //        final String outputPcapFile = path + "/2018-07/dlink/dlink-processed.pcap";
29 //        final String triggerTimesFile = path + "/2018-07/dlink/dlink-july-26-2018.timestamps";
30 //        final String deviceIp = "192.168.1.199"; // .246 == phone; .199 == dlink plug?
31
32         List<List<PcapPacket>> signature = PrintUtils.serializeClustersFromFile(signatureFile);
33         SignatureDetector signatureDetector = new SignatureDetector(signature, null,
34                 (sig, match) -> System.out.println(
35                         String.format("[ !!! SIGNATURE DETECTED AT %s !!! ]", match.get(0).getTimestamp().toString())
36                 )
37         );
38
39
40         PcapHandle handle;
41         try {
42             handle = Pcaps.openOffline(inputPcapFile, PcapHandle.TimestampPrecision.NANO);
43         } catch (PcapNativeException pne) {
44             handle = Pcaps.openOffline(inputPcapFile);
45         }
46         PcapHandleReader reader = new PcapHandleReader(handle, p -> true, signatureDetector);
47         reader.readFromHandle();
48         signatureDetector.performDetection();
49     }
50
51     /**
52      * The signature that this {@link SignatureDetector} is trying to detect in the observed traffic.
53      */
54     private final List<List<PcapPacket>> mSignature;
55
56     /**
57      * The directions of packets in the sequences that make up {@link #mSignature}.
58      */
59     private final Conversation.Direction[] mSignatureDirections;
60
61     /**
62      * For reassembling the observed traffic into TCP connections.
63      */
64     private final TcpReassembler mTcpReassembler = new TcpReassembler();
65
66     /**
67      * IP of the router's WAN port (if analyzed traffic is captured at the ISP's point of view).
68      */
69     private final String mRouterWanIp;
70
71     private final Observer[] mObservers;
72
73     public SignatureDetector(List<List<PcapPacket>> signature, String routerWanIp, Observer... detectionObservers) {
74         mSignature = Collections.unmodifiableList(Objects.requireNonNull(signature, "signature cannot be null"));
75         mObservers = Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
76         if (mSignature.isEmpty() || mSignature.stream().anyMatch(inner -> inner.isEmpty())) {
77             throw new IllegalArgumentException("signature is empty (or contains empty inner List)");
78         }
79         if (mObservers.length == 0) {
80             throw new IllegalArgumentException("no detectionObservers provided");
81         }
82         mRouterWanIp = routerWanIp;
83         // Build the signature's direction sequence.
84         // Note: assumes that the provided signature was captured within the local network (routerWanIp is set to null).
85         mSignatureDirections = getPacketDirections(mSignature.get(0), null);
86         /*
87          * Enforce restriction on cluster/signature members: all representatives must exhibit the same direction pattern
88          * and contain the same number of packets. Note that this is a somewhat heavy operation, so it may be disabled
89          * later on in favor of performance. However, it is only run once (at instantiation), so the overhead may be
90          * warranted in order to ensure correctness, especially during the development/debugging phase.
91          */
92         if (mSignature.stream().
93                 anyMatch(inner -> !Arrays.equals(mSignatureDirections, getPacketDirections(inner, null)))) {
94             throw new IllegalArgumentException(
95                     "signature members must contain the same number of packets and exhibit the same packet direction " +
96                             "pattern"
97             );
98         }
99     }
100
101     @Override
102     public void gotPacket(PcapPacket packet) {
103         // Present packet to TCP reassembler so that it can be mapped to a connection (if it is a TCP packet).
104         mTcpReassembler.gotPacket(packet);
105     }
106
107
108 //    public void performDetection() {
109 //        // Let's start out simple by building a version that only works for signatures that do not span across multiple
110 //        // TCP conversations...
111 //        for (Conversation c : mTcpReassembler.getTcpConversations()) {
112 //            for (List<PcapPacket> sequence : mSignature) {
113 //                boolean matchFound = isSequenceInConversation(sequence, c);
114 //                if (matchFound) {
115 //                    for (Observer obs : mObservers) {
116 //                        obs.onSequenceDetected(sequence, c);
117 //                    }
118 //                    // Found signature in current conversation, so break inner loop and continue with next conversation.
119 //                    // TODO: signature can be present more than once in Conversation...
120 //                    break;
121 //                }
122 //            }
123 //            /*
124 //             * TODO:
125 //             * if no item in cluster matches, also perform a distance-based matching to cover those cases where we did
126 //             * not manage to capture every single mutation of the sequence during training.
127 //             *
128 //             * Need to compute average/centroid of cluster to do so...? Compute within-cluster variance, then check if
129 //             * distance between input conversation and cluster average/centroid is smaller than or equal to the computed
130 //             * variance?
131 //             */
132 //        }
133 //    }
134
135
136     public void performDetection() {
137         /*
138          * Let's start out simple by building a version that only works for signatures that do not span across multiple
139          * TCP conversations...
140          */
141         for (Conversation c : mTcpReassembler.getTcpConversations()) {
142             if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
143                 // Skip empty conversations.
144                 continue;
145             }
146             for (List<PcapPacket> signatureSequence : mSignature) {
147                 if (isTlsSequence(signatureSequence) != c.isTls()) {
148                     // We consider it a mismatch if one is a TLS application data sequence and the other is not.
149                     continue;
150                 }
151                 // Fetch set of packets to examine based on TLS or not.
152                 List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
153                 /*
154                  * Note: since we expect all sequences that together make up the signature to exhibit the same direction
155                  * pattern, we can simply pass the precomputed direction array for the signature sequence so that it
156                  * won't have to be recomputed internally in each call to findSubsequenceInSequence().
157                  */
158                 Optional<List<PcapPacket>> match =
159                         findSubsequenceInSequence(signatureSequence, cPkts, mSignatureDirections, null);
160                 match.ifPresent(ps -> Arrays.stream(mObservers).forEach(o -> o.onSignatureDetected(mSignature, ps)));
161                 if (match.isPresent()) {
162                     /*
163                      * We found an element in the signature cluster that was present in conversation, so no need to scan
164                      * conversation for remaining members of signature cluster (in fact, we'd be getting duplicate
165                      * output in those cases where the cluster is made up of identical sequences if we did not stop the
166                      * search here).
167                      *
168                      * TODO:
169                      * How do we handle those cases where the conversation matches the signature more than once (for
170                      * example, the long-lived connections used for sending the trigger from the cloud)?
171                      */
172                     break;
173                 }
174             }
175         }
176     }
177
178 //    /**
179 //     * Examine if a {@link Conversation} contains a given sequence of packets. Note: the current implementation actually
180 //     * searches for a substring as it does not allow for interleaved packets in {@code c} that are not in
181 //     * {@code sequence}; for example, if {@code sequence} consists of packet lengths [2, 3, 5] and {@code c} consists of
182 //     * packet lengths [2, 3, 4, 5], the result will be {@code false}. If we are to allow interleaved packets, we need
183 //     * a modified version of <a href="https://stackoverflow.com/a/20545604/1214974">this</a>.
184 //     * @param sequence The sequence to look for.
185 //     * @param c The {@link Conversation} to search for {@code sequence} in.
186 //     * @return {@code true} if {@code c} contains {@code sequence}, {@code false} otherwise.
187 //     */
188 //    private boolean isSequenceInConversation(List<PcapPacket> sequence, Conversation c) {
189 //        // TODO add offset argument to allow looking for sequence starting later in Conversation.
190 //        // The packets we match against differ depending on whether the signature is a TLS or non-TLS signature.
191 //        boolean tlsSequence = isTlsSequence(sequence);
192 //        if (tlsSequence && !c.isTls()) {
193 //            // If we're looking for a TLS signature and this conversation does not appear to be a TLS conversation, we
194 //            // are done. Note: this assumes that they do NOT start performing TLS on new ports that are not captured in
195 //            // Conversation.isTls()
196 //            return false;
197 //        }
198 //        // Based on TLS or non-TLS signature, fetch the corresponding list of packets to match against.
199 //        List<PcapPacket> packets = tlsSequence ? c.getTlsApplicationDataPackets() : c.getPackets();
200 //        // If sequence is longer than the conversation, it can obviously not be contained in the conversation.
201 //        if (packets.size() < sequence.size()) {
202 //            return false;
203 //        }
204 //        /*
205 //         * Generate packet direction array for c. We have already generated the packet direction array for sequence as
206 //         * part of the constructor (mSignatureDirections).
207 //         */
208 //        Conversation.Direction[] cDirections = getPacketDirections(packets, mRouterWanIp);
209 //        int seqIdx = 0;
210 //        int convIdx = 0;
211 //        while (convIdx < packets.size()) {
212 //            PcapPacket seqPkt = sequence.get(seqIdx);
213 //            PcapPacket convPkt = packets.get(convIdx);
214 //            // We only have a match if packet lengths and directions match.
215 //            if (convPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
216 //                    mSignatureDirections[seqIdx] == cDirections[convIdx]) {
217 //                // A match, advance both indices to consider next packet in sequence vs. next packet in conversation
218 //                seqIdx++;
219 //                convIdx++;
220 //                if (seqIdx == sequence.size()) {
221 //                    // we managed to match the full sequence in the conversation.
222 //                    return true;
223 //                }
224 //            } else {
225 //                // Mismatch.
226 //                if (seqIdx > 0) {
227 //                    /*
228 //                     * If we managed to match parts of sequence, we restart the search for sequence in c at the index of
229 //                     * c where the current mismatch occurred. I.e., we must reset seqIdx, but leave convIdx untouched.
230 //                     */
231 //                    seqIdx = 0;
232 //                } else {
233 //                    /*
234 //                     * First packet of sequence didn't match packet at convIdx of conversation, so we move forward in
235 //                     * conversation, i.e., we continue the search for sequence in c starting at index convIdx+1 of c.
236 //                     */
237 //                    convIdx++;
238 //                }
239 //            }
240 //        }
241 //        return false;
242 //    }
243
244     private boolean isTlsSequence(List<PcapPacket> sequence) {
245         // NOTE: Assumes ALL packets in sequence pertain to the same TCP connection!
246         PcapPacket firstPkt = sequence.get(0);
247         int srcPort = getSourcePort(firstPkt);
248         int dstPort = getDestinationPort(firstPkt);
249         return TcpConversationUtils.isTlsPort(srcPort) || TcpConversationUtils.isTlsPort(dstPort);
250     }
251
252 //    private List<PcapPacket> findeSequenceInConversation(List<PcapPacket> sequence, Conversation conv, int offset) {
253 //        if (isTlsSequence(sequence) != conv.isTls()) {
254 //            // We consider it a mismatch if one is a TLS Application Data sequence and the other is not.
255 //            return null;
256 //        }
257 //        List<PcapPacket> convPackets = conv.isTls() ? conv.getTlsApplicationDataPackets() : conv.getPackets();
258 //
259 //    }
260
261     private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
262                                                                  List<PcapPacket> sequence,
263                                                                  Conversation.Direction[] subsequenceDirections,
264                                                                  Conversation.Direction[] sequenceDirections) {
265         if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
266             // We consider it a mismatch if one is a TLS application data sequence and the other is not.
267             return Optional.empty();
268         }
269         if (sequence.size() < subsequence.size()) {
270             // If subsequence is longer, it cannot be contained in sequence.
271             return Optional.empty();
272         }
273         // If packet directions have not been precomputed by calling code, we need to construct them.
274         if (subsequenceDirections == null) {
275             subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
276         }
277         if (sequenceDirections == null) {
278             sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
279         }
280         int subseqIdx = 0;
281         int seqIdx = 0;
282         while (seqIdx < sequence.size()) {
283             PcapPacket subseqPkt = subsequence.get(subseqIdx);
284             PcapPacket seqPkt = sequence.get(seqIdx);
285             // We only have a match if packet lengths and directions match.
286             if (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
287                     subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
288                 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
289                 subseqIdx++;
290                 seqIdx++;
291                 if (subseqIdx == subsequence.size()) {
292                     // We managed to match the entire subsequence in sequence.
293                     // Return the sublist of sequence that matches subsequence.
294                     /*
295                      * TODO:
296                      * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
297                      * for live traces!
298                      */
299                     return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
300                 }
301             } else {
302                 // Mismatch.
303                 if (subseqIdx > 0) {
304                     /*
305                      * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
306                      * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
307                      * leave seqIdx untouched.
308                      */
309                     subseqIdx = 0;
310                 } else {
311                     /*
312                      * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
313                      * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
314                      * sequence.
315                      */
316                     seqIdx++;
317                 }
318             }
319         }
320         return Optional.empty();
321     }
322
323     /**
324      * Given a {@code List<PcapPacket>}, generate a {@code Conversation.Direction[]} such that each entry in the
325      * resulting {@code Conversation.Direction[]} specifies the direction of the {@link PcapPacket} at the corresponding
326      * index in the input list.
327      * @param packets The list of packets for which to construct a corresponding array of packet directions.
328      * @param routerWanIp The IP of the router's WAN port. This is used for determining the direction of packets when
329      *                    the traffic is captured just outside the local network (at the ISP side of the router). Set to
330      *                    {@code null} if {@code packets} stem from traffic captured within the local network.
331      * @return A {@code Conversation.Direction[]} specifying the direction of the {@link PcapPacket} at the
332      *         corresponding index in {@code packets}.
333      */
334     private static Conversation.Direction[] getPacketDirections(List<PcapPacket> packets, String routerWanIp) {
335         Conversation.Direction[] directions = new Conversation.Direction[packets.size()];
336         for (int i = 0; i < packets.size(); i++) {
337             PcapPacket pkt = packets.get(i);
338             if (getSourceIp(pkt).equals(getDestinationIp(pkt))) {
339                 // Sanity check: we shouldn't be processing loopback traffic
340                 throw new AssertionError("loopback traffic detected");
341             }
342             if (isSrcIpLocal(pkt) || getSourceIp(pkt).equals(routerWanIp)) {
343                 directions[i] = Conversation.Direction.CLIENT_TO_SERVER;
344             } else if (isDstIpLocal(pkt) || getDestinationIp(pkt).equals(routerWanIp)) {
345                 directions[i] = Conversation.Direction.SERVER_TO_CLIENT;
346             } else {
347                 throw new IllegalArgumentException("no local IP or router WAN port IP found, can't detect direction");
348             }
349         }
350         return directions;
351     }
352
353     interface Observer {
354 //        /**
355 //         * Callback that is invoked when a sequence associated with the signature/cluster (i.e., the sequence is a
356 //         * member of the cluster that makes up the signature) is detected in a {@link Conversation}.
357 //         * @param sequence The sequence that was detected in {@code conversation}.
358 //         * @param conversation The {@link Conversation} that {@code sequence} was detected in.
359 //         */
360 //        void onSequenceDetected(List<PcapPacket> sequence, Conversation conversation);
361
362         void onSignatureDetected(List<List<PcapPacket>> signature, List<PcapPacket> match);
363     }
364
365 }