Adding the serializer/deserializer for the entire signature.
[pingpong.git] / Code / Projects / SmartPlugDetector / src / main / java / edu / uci / iotproject / detection / SignatureDetector.java
1 package edu.uci.iotproject.detection;
2
3 import edu.uci.iotproject.Conversation;
4 import edu.uci.iotproject.TcpReassembler;
5 import edu.uci.iotproject.analysis.TcpConversationUtils;
6 import edu.uci.iotproject.io.PcapHandleReader;
7 import edu.uci.iotproject.util.PrintUtils;
8 import org.pcap4j.core.*;
9
10 import java.time.ZoneId;
11 import java.util.*;
12 import java.util.stream.Collectors;
13
14 import static edu.uci.iotproject.util.PcapPacketUtils.*;
15
16 /**
17  * TODO add class documentation.
18  *
19  * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
20  * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
21  */
22 public class SignatureDetector implements PacketListener {
23
24     // Test client
25     public static void main(String[] args) throws PcapNativeException, NotOpenException {
26
27 //        String path = "/scratch/July-2018"; // Rahmadi
28         String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
29         final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
30         final String signatureFile = path + "/2018-07/dlink/offSignature1.sig";
31
32         List<List<PcapPacket>> signature = PrintUtils.serializeClustersFromFile(signatureFile);
33         SignatureDetector signatureDetector = new SignatureDetector(signature, null,
34                 (sig, match) -> System.out.println(
35                         String.format("[ !!! SIGNATURE DETECTED AT %s !!! ]",
36                                 match.get(0).getTimestamp().atZone(ZoneId.of("America/Los_Angeles")))
37                 )
38         );
39
40         PcapHandle handle;
41         try {
42             handle = Pcaps.openOffline(inputPcapFile, PcapHandle.TimestampPrecision.NANO);
43         } catch (PcapNativeException pne) {
44             handle = Pcaps.openOffline(inputPcapFile);
45         }
46         PcapHandleReader reader = new PcapHandleReader(handle, p -> true, signatureDetector);
47         reader.readFromHandle();
48         signatureDetector.performDetection();
49     }
50
51     /**
52      * The signature that this {@link SignatureDetector} is trying to detect in the observed traffic.
53      */
54     private final List<List<PcapPacket>> mSignature;
55
56     /**
57      * The directions of packets in the sequences that make up {@link #mSignature}.
58      */
59     private final Conversation.Direction[] mSignatureDirections;
60
61     /**
62      * For reassembling the observed traffic into TCP connections.
63      */
64     private final TcpReassembler mTcpReassembler = new TcpReassembler();
65
66     /**
67      * IP of the router's WAN port (if analyzed traffic is captured at the ISP's point of view).
68      */
69     private final String mRouterWanIp;
70
71     private final Observer[] mObservers;
72
73     public SignatureDetector(List<List<PcapPacket>> signature, String routerWanIp, Observer... detectionObservers) {
74         mSignature = Collections.unmodifiableList(Objects.requireNonNull(signature, "signature cannot be null"));
75         mObservers = Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
76         if (mSignature.isEmpty() || mSignature.stream().anyMatch(inner -> inner.isEmpty())) {
77             throw new IllegalArgumentException("signature is empty (or contains empty inner List)");
78         }
79         if (mObservers.length == 0) {
80             throw new IllegalArgumentException("no detectionObservers provided");
81         }
82         mRouterWanIp = routerWanIp;
83         // Build the signature's direction sequence.
84         // Note: assumes that the provided signature was captured within the local network (routerWanIp is set to null).
85         mSignatureDirections = getPacketDirections(mSignature.get(0), null);
86         /*
87          * Enforce restriction on cluster/signature members: all representatives must exhibit the same direction pattern
88          * and contain the same number of packets. Note that this is a somewhat heavy operation, so it may be disabled
89          * later on in favor of performance. However, it is only run once (at instantiation), so the overhead may be
90          * warranted in order to ensure correctness, especially during the development/debugging phase.
91          */
92         if (mSignature.stream().
93                 anyMatch(inner -> !Arrays.equals(mSignatureDirections, getPacketDirections(inner, null)))) {
94             throw new IllegalArgumentException(
95                     "signature members must contain the same number of packets and exhibit the same packet direction " +
96                             "pattern"
97             );
98         }
99     }
100
101     @Override
102     public void gotPacket(PcapPacket packet) {
103         // Present packet to TCP reassembler so that it can be mapped to a connection (if it is a TCP packet).
104         mTcpReassembler.gotPacket(packet);
105     }
106
107
108 //    public void performDetection() {
109 //        // Let's start out simple by building a version that only works for signatures that do not span across multiple
110 //        // TCP conversations...
111 //        for (Conversation c : mTcpReassembler.getTcpConversations()) {
112 //            for (List<PcapPacket> sequence : mSignature) {
113 //                boolean matchFound = isSequenceInConversation(sequence, c);
114 //                if (matchFound) {
115 //                    for (Observer obs : mObservers) {
116 //                        obs.onSequenceDetected(sequence, c);
117 //                    }
118 //                    // Found signature in current conversation, so break inner loop and continue with next conversation.
119 //                    // TODO: signature can be present more than once in Conversation...
120 //                    break;
121 //                }
122 //            }
123 //            /*
124 //             * TODO:
125 //             * if no item in cluster matches, also perform a distance-based matching to cover those cases where we did
126 //             * not manage to capture every single mutation of the sequence during training.
127 //             *
128 //             * Need to compute average/centroid of cluster to do so...? Compute within-cluster variance, then check if
129 //             * distance between input conversation and cluster average/centroid is smaller than or equal to the computed
130 //             * variance?
131 //             */
132 //        }
133 //    }
134
135
136     public void performDetection() {
137         /*
138          * Let's start out simple by building a version that only works for signatures that do not span across multiple
139          * TCP conversations...
140          */
141         for (Conversation c : mTcpReassembler.getTcpConversations()) {
142             if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
143                 // Skip empty conversations.
144                 continue;
145             }
146             for (List<PcapPacket> signatureSequence : mSignature) {
147                 if (isTlsSequence(signatureSequence) != c.isTls()) {
148                     // We consider it a mismatch if one is a TLS application data sequence and the other is not.
149                     continue;
150                 }
151                 // Fetch set of packets to examine based on TLS or not.
152                 List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
153                 /*
154                  * Note: we embed the attempt to detect the signature sequence in a loop in order to capture those cases
155                  * where the same signature sequence appears multiple times in one Conversation.
156                  *
157                  * Note: as the cluster can be made up of identical sequences, we must keep track of whether we detected
158                  * a match and, if so, break the inner for-each loop in order to prevent raising an alarm for each
159                  * cluster-member (prevent duplicate detections of the same event). However, a negative side-effect of
160                  * this is that, in doing so, we will also skip searching for subsequent different cluster members in
161                  * the current conversation if the current cluster member is a match.
162                  *
163                  * Note: since we expect all sequences that together make up the signature to exhibit the same direction
164                  * pattern, we can simply pass the precomputed direction array for the signature sequence so that it
165                  * won't have to be recomputed internally in each call to findSubsequenceInSequence().
166                  */
167                 Optional<List<PcapPacket>> match;
168                 boolean matchFound = false;
169                 while ((match = findSubsequenceInSequence(signatureSequence, cPkts, mSignatureDirections, null)).
170                         isPresent()) {
171                     matchFound = true;
172                     List<PcapPacket> matchSeq = match.get();
173                     // Notify observers about the match.
174                     Arrays.stream(mObservers).forEach(o -> o.onSignatureDetected(mSignature, matchSeq));
175                     /*
176                      * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
177                      * signature sequence.
178                      */
179                     int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size()-1));
180                     // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
181                     cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
182                 }
183                 if (matchFound) {
184                     // Break inner for-each loop in order to avoid duplicate detection of same event (see comment above)
185                     break;
186                 }
187
188
189 //                match.ifPresent(ps -> Arrays.stream(mObservers).forEach(o -> o.onSignatureDetected(mSignature, ps)));
190 //                if (match.isPresent()) {
191 //                    /*
192 //                     * We found an element in the signature cluster that was present in conversation, so no need to scan
193 //                     * conversation for remaining members of signature cluster (in fact, we'd be getting duplicate
194 //                     * output in those cases where the cluster is made up of identical sequences if we did not stop the
195 //                     * search here).
196 //                     *
197 //                     * TODO:
198 //                     * How do we handle those cases where the conversation matches the signature more than once (for
199 //                     * example, the long-lived connections used for sending the trigger from the cloud)?
200 //                     */
201 //                    break;
202 //                }
203             }
204         }
205     }
206
207 //    /**
208 //     * Examine if a {@link Conversation} contains a given sequence of packets. Note: the current implementation actually
209 //     * searches for a substring as it does not allow for interleaved packets in {@code c} that are not in
210 //     * {@code sequence}; for example, if {@code sequence} consists of packet lengths [2, 3, 5] and {@code c} consists of
211 //     * packet lengths [2, 3, 4, 5], the result will be {@code false}. If we are to allow interleaved packets, we need
212 //     * a modified version of <a href="https://stackoverflow.com/a/20545604/1214974">this</a>.
213 //     * @param sequence The sequence to look for.
214 //     * @param c The {@link Conversation} to search for {@code sequence} in.
215 //     * @return {@code true} if {@code c} contains {@code sequence}, {@code false} otherwise.
216 //     */
217 //    private boolean isSequenceInConversation(List<PcapPacket> sequence, Conversation c) {
218 //        // TODO add offset argument to allow looking for sequence starting later in Conversation.
219 //        // The packets we match against differ depending on whether the signature is a TLS or non-TLS signature.
220 //        boolean tlsSequence = isTlsSequence(sequence);
221 //        if (tlsSequence && !c.isTls()) {
222 //            // If we're looking for a TLS signature and this conversation does not appear to be a TLS conversation, we
223 //            // are done. Note: this assumes that they do NOT start performing TLS on new ports that are not captured in
224 //            // Conversation.isTls()
225 //            return false;
226 //        }
227 //        // Based on TLS or non-TLS signature, fetch the corresponding list of packets to match against.
228 //        List<PcapPacket> packets = tlsSequence ? c.getTlsApplicationDataPackets() : c.getPackets();
229 //        // If sequence is longer than the conversation, it can obviously not be contained in the conversation.
230 //        if (packets.size() < sequence.size()) {
231 //            return false;
232 //        }
233 //        /*
234 //         * Generate packet direction array for c. We have already generated the packet direction array for sequence as
235 //         * part of the constructor (mSignatureDirections).
236 //         */
237 //        Conversation.Direction[] cDirections = getPacketDirections(packets, mRouterWanIp);
238 //        int seqIdx = 0;
239 //        int convIdx = 0;
240 //        while (convIdx < packets.size()) {
241 //            PcapPacket seqPkt = sequence.get(seqIdx);
242 //            PcapPacket convPkt = packets.get(convIdx);
243 //            // We only have a match if packet lengths and directions match.
244 //            if (convPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
245 //                    mSignatureDirections[seqIdx] == cDirections[convIdx]) {
246 //                // A match, advance both indices to consider next packet in sequence vs. next packet in conversation
247 //                seqIdx++;
248 //                convIdx++;
249 //                if (seqIdx == sequence.size()) {
250 //                    // we managed to match the full sequence in the conversation.
251 //                    return true;
252 //                }
253 //            } else {
254 //                // Mismatch.
255 //                if (seqIdx > 0) {
256 //                    /*
257 //                     * If we managed to match parts of sequence, we restart the search for sequence in c at the index of
258 //                     * c where the current mismatch occurred. I.e., we must reset seqIdx, but leave convIdx untouched.
259 //                     */
260 //                    seqIdx = 0;
261 //                } else {
262 //                    /*
263 //                     * First packet of sequence didn't match packet at convIdx of conversation, so we move forward in
264 //                     * conversation, i.e., we continue the search for sequence in c starting at index convIdx+1 of c.
265 //                     */
266 //                    convIdx++;
267 //                }
268 //            }
269 //        }
270 //        return false;
271 //    }
272
273     private boolean isTlsSequence(List<PcapPacket> sequence) {
274         // NOTE: Assumes ALL packets in sequence pertain to the same TCP connection!
275         PcapPacket firstPkt = sequence.get(0);
276         int srcPort = getSourcePort(firstPkt);
277         int dstPort = getDestinationPort(firstPkt);
278         return TcpConversationUtils.isTlsPort(srcPort) || TcpConversationUtils.isTlsPort(dstPort);
279     }
280
281 //    private List<PcapPacket> findeSequenceInConversation(List<PcapPacket> sequence, Conversation conv, int offset) {
282 //        if (isTlsSequence(sequence) != conv.isTls()) {
283 //            // We consider it a mismatch if one is a TLS Application Data sequence and the other is not.
284 //            return null;
285 //        }
286 //        List<PcapPacket> convPackets = conv.isTls() ? conv.getTlsApplicationDataPackets() : conv.getPackets();
287 //
288 //    }
289
290     private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
291                                                                  List<PcapPacket> sequence,
292                                                                  Conversation.Direction[] subsequenceDirections,
293                                                                  Conversation.Direction[] sequenceDirections) {
294         if (sequence.size() < subsequence.size()) {
295             // If subsequence is longer, it cannot be contained in sequence.
296             return Optional.empty();
297         }
298         if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
299             // We consider it a mismatch if one is a TLS application data sequence and the other is not.
300             return Optional.empty();
301         }
302         // If packet directions have not been precomputed by calling code, we need to construct them.
303         if (subsequenceDirections == null) {
304             subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
305         }
306         if (sequenceDirections == null) {
307             sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
308         }
309         int subseqIdx = 0;
310         int seqIdx = 0;
311         while (seqIdx < sequence.size()) {
312             PcapPacket subseqPkt = subsequence.get(subseqIdx);
313             PcapPacket seqPkt = sequence.get(seqIdx);
314             // We only have a match if packet lengths and directions match.
315             if (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
316                     subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
317                 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
318                 subseqIdx++;
319                 seqIdx++;
320                 if (subseqIdx == subsequence.size()) {
321                     // We managed to match the entire subsequence in sequence.
322                     // Return the sublist of sequence that matches subsequence.
323                     /*
324                      * TODO:
325                      * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
326                      * for live traces!
327                      */
328                     return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
329                 }
330             } else {
331                 // Mismatch.
332                 if (subseqIdx > 0) {
333                     /*
334                      * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
335                      * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
336                      * leave seqIdx untouched.
337                      */
338                     subseqIdx = 0;
339                 } else {
340                     /*
341                      * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
342                      * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
343                      * sequence.
344                      */
345                     seqIdx++;
346                 }
347             }
348         }
349         return Optional.empty();
350     }
351
352     /**
353      * Given a {@code List<PcapPacket>}, generate a {@code Conversation.Direction[]} such that each entry in the
354      * resulting {@code Conversation.Direction[]} specifies the direction of the {@link PcapPacket} at the corresponding
355      * index in the input list.
356      * @param packets The list of packets for which to construct a corresponding array of packet directions.
357      * @param routerWanIp The IP of the router's WAN port. This is used for determining the direction of packets when
358      *                    the traffic is captured just outside the local network (at the ISP side of the router). Set to
359      *                    {@code null} if {@code packets} stem from traffic captured within the local network.
360      * @return A {@code Conversation.Direction[]} specifying the direction of the {@link PcapPacket} at the
361      *         corresponding index in {@code packets}.
362      */
363     private static Conversation.Direction[] getPacketDirections(List<PcapPacket> packets, String routerWanIp) {
364         Conversation.Direction[] directions = new Conversation.Direction[packets.size()];
365         for (int i = 0; i < packets.size(); i++) {
366             PcapPacket pkt = packets.get(i);
367             if (getSourceIp(pkt).equals(getDestinationIp(pkt))) {
368                 // Sanity check: we shouldn't be processing loopback traffic
369                 throw new AssertionError("loopback traffic detected");
370             }
371             if (isSrcIpLocal(pkt) || getSourceIp(pkt).equals(routerWanIp)) {
372                 directions[i] = Conversation.Direction.CLIENT_TO_SERVER;
373             } else if (isDstIpLocal(pkt) || getDestinationIp(pkt).equals(routerWanIp)) {
374                 directions[i] = Conversation.Direction.SERVER_TO_CLIENT;
375             } else {
376                 throw new IllegalArgumentException("no local IP or router WAN port IP found, can't detect direction");
377             }
378         }
379         return directions;
380     }
381
382     interface Observer {
383 //        /**
384 //         * Callback that is invoked when a sequence associated with the signature/cluster (i.e., the sequence is a
385 //         * member of the cluster that makes up the signature) is detected in a {@link Conversation}.
386 //         * @param sequence The sequence that was detected in {@code conversation}.
387 //         * @param conversation The {@link Conversation} that {@code sequence} was detected in.
388 //         */
389 //        void onSequenceDetected(List<PcapPacket> sequence, Conversation conversation);
390
391         void onSignatureDetected(List<List<PcapPacket>> signature, List<PcapPacket> match);
392     }
393
394 }