Implementing relaxed matching for layer 2 and layer 3.
[pingpong.git] / Code / Projects / PacketLevelSignatureExtractor / src / main / java / edu / uci / iotproject / detection / layer3 / Layer3ClusterMatcher.java
1 package edu.uci.iotproject.detection.layer3;
2
3 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
4 import edu.uci.iotproject.detection.AbstractClusterMatcher;
5 import edu.uci.iotproject.detection.ClusterMatcherObserver;
6 import edu.uci.iotproject.trafficreassembly.layer3.Conversation;
7 import edu.uci.iotproject.trafficreassembly.layer3.TcpReassembler;
8 import edu.uci.iotproject.analysis.TcpConversationUtils;
9 import edu.uci.iotproject.io.PcapHandleReader;
10 import edu.uci.iotproject.util.PrintUtils;
11 import org.pcap4j.core.*;
12
13 import java.time.ZoneId;
14 import java.util.*;
15 import java.util.stream.Collectors;
16
17 import static edu.uci.iotproject.util.PcapPacketUtils.*;
18
19 /**
20  * Searches a traffic trace for sequences of packets "belong to" a given cluster (in other words, attempts to classify
21  * traffic as pertaining to a given cluster).
22  *
23  * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
24  * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
25  */
26 public class Layer3ClusterMatcher extends AbstractClusterMatcher implements PacketListener {
27
28     /**
29      * The ordered directions of packets in the sequences that make up {@link #mCluster}.
30      */
31     private final Conversation.Direction[] mClusterMemberDirections;
32
33     /**
34      * For reassembling the observed traffic into TCP connections.
35      */
36     private final TcpReassembler mTcpReassembler;
37
38     /**
39      * IP of the router's WAN port (if analyzed traffic is captured at the ISP's point of view).
40      */
41     private final String mRouterWanIp;
42
43     /**
44      * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
45      */
46     private final double mEps;
47
48     /**
49      * The packet inclusion time for signature.
50      */
51     private int mInclusionTimeMillis;
52
53     /**
54      * Relaxed matching
55      */
56     private int mDelta;
57     private Set<Integer> mPacketSet;
58
59     /**
60      * Create a {@link Layer3ClusterMatcher}.
61      * @param cluster The cluster that traffic is matched against.
62      * @param routerWanIp The router's WAN IP if examining traffic captured at the ISP's point of view (used for
63      *                    determining the direction of packets).
64      * @param inclusionTimeMillis The packet inclusion time for signature.
65      * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
66      * @param eps The epsilon value used in the DBSCAN algorithm.
67      * @param detectionObservers Client code that wants to get notified whenever the {@link Layer3ClusterMatcher} detects that
68      *                          (a subset of) the examined traffic is similar to the traffic that makes up
69      *                          {@code cluster}, i.e., when the examined traffic is classified as pertaining to
70      *                          {@code cluster}.
71      */
72     public Layer3ClusterMatcher(List<List<PcapPacket>> cluster, String routerWanIp, int inclusionTimeMillis,
73                                 boolean isRangeBased, double eps, int delta, Set<Integer> packetSet,
74                                 ClusterMatcherObserver... detectionObservers) {
75         super(cluster, isRangeBased);
76         Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
77         for (ClusterMatcherObserver obs : detectionObservers) {
78             addObserver(obs);
79         }
80         // Build the cluster members' direction sequence.
81         // Note: assumes that the provided cluster was captured within the local network (routerWanIp is set to null).
82         mClusterMemberDirections = getPacketDirections(cluster.get(0), null);
83         /*
84          * Enforce restriction on cluster members: all representatives must exhibit the same direction pattern and
85          * contain the same number of packets. Note that this is a somewhat heavy operation, so it may be disabled later
86          * on in favor of performance. However, it is only run once (at instantiation), so the overhead may be warranted
87          * in order to ensure correctness, especially during the development/debugging phase.
88          */
89         if (!isRangeBased) {    // Only when it is not range-based
90             if (mCluster.stream().
91                     anyMatch(inner -> !Arrays.equals(mClusterMemberDirections, getPacketDirections(inner, null)))) {
92                 throw new IllegalArgumentException(
93                         "cluster members must contain the same number of packets and exhibit the same packet direction " +
94                                 "pattern"
95                 );
96             }
97         }
98         mEps = eps;
99         mRouterWanIp = routerWanIp;
100                                 mTcpReassembler = new TcpReassembler(mRouterWanIp);
101         mInclusionTimeMillis =
102                 inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
103         mDelta = delta;
104         mPacketSet = packetSet;
105     }
106
107     @Override
108     public void gotPacket(PcapPacket packet) {
109         // Present packet to TCP reassembler so that it can be mapped to a connection (if it is a TCP packet).
110         mTcpReassembler.gotPacket(packet);
111     }
112
113     /**
114      * Get the cluster that describes the packet sequence that this {@link Layer3ClusterMatcher} is searching for.
115      * @return the cluster that describes the packet sequence that this {@link Layer3ClusterMatcher} is searching for.
116      */
117     public List<List<PcapPacket>> getCluster() {
118         return mCluster;
119     }
120
121     public void performDetectionRangeBased() {
122         /*
123          * Let's start out simple by building a version that only works for signatures that do not span across multiple
124          * TCP conversations...
125          */
126         for (Conversation c : mTcpReassembler.getTcpConversations()) {
127             if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
128                 // Skip empty conversations.
129                 continue;
130             }
131             List<PcapPacket> lowerBound = mCluster.get(0);
132             List<PcapPacket> upperBound = mCluster.get(1);
133             if (isTlsSequence(lowerBound) != c.isTls() || isTlsSequence(upperBound) != c.isTls()) {
134                 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
135                 continue;
136             }
137             // Fetch set of packets to examine based on TLS or not.
138             List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
139             Optional<List<PcapPacket>> match;
140             while ((match = findSubsequenceInSequence(lowerBound, upperBound, cPkts, mClusterMemberDirections, null)).
141                     isPresent()) {
142                 List<PcapPacket> matchSeq = match.get();
143                 // Notify observers about the match.
144                 // Max number of skipped packets in layer 3 is 0 (no skipped packets)
145                 mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
146                 /*
147                  * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
148                  * signature sequence.
149                  */
150                 int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
151                 // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
152                 cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
153             }
154         }
155     }
156
157     // TODO: Relaxed matching with delta is only applied to conservative matching for now
158     public void performDetectionConservative() {
159         /*
160          * Let's start out simple by building a version that only works for signatures that do not span across multiple
161          * TCP conversations...
162          */
163         for (Conversation c : mTcpReassembler.getTcpConversations()) {
164             if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
165                 // Skip empty conversations.
166                 continue;
167             }
168             for (List<PcapPacket> signatureSequence : mCluster) {
169                 if (isTlsSequence(signatureSequence) != c.isTls()) {
170                     // We consider it a mismatch if one is a TLS application data sequence and the other is not.
171                     continue;
172                 }
173                 // Fetch set of packets to examine based on TLS or not.
174                 List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
175                 /*
176                  * Note: we embed the attempt to detect the signature sequence in a loop in order to capture those cases
177                  * where the same signature sequence appears multiple times in one Conversation.
178                  *
179                  * Note: since we expect all sequences that together make up the signature to exhibit the same direction
180                  * pattern, we can simply pass the precomputed direction array for the signature sequence so that it
181                  * won't have to be recomputed internally in each call to findSubsequenceInSequence().
182                  */
183                 Optional<List<PcapPacket>> match;
184                 while ((match = findSubsequenceInSequence(signatureSequence, cPkts, mClusterMemberDirections, null, mDelta, mPacketSet)).
185                         isPresent()) {
186                     List<PcapPacket> matchSeq = match.get();
187                     // Notify observers about the match.
188                     // Max number of skipped packets in layer 3 is 0 (no skipped packets)
189                     mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
190                     /*
191                      * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
192                      * signature sequence.
193                      */
194                     int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
195                     // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
196                     cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
197                 }
198             }
199
200             /*
201              * TODO:
202              * if no item in cluster matches, also perform a distance-based matching to cover those cases where we did
203              * not manage to capture every single mutation of the sequence during training.
204              *
205              * Need to compute average/centroid of cluster to do so...? Compute within-cluster variance, then check if
206              * distance between input conversation and cluster average/centroid is smaller than or equal to the computed
207              * variance?
208              */
209         }
210     }
211
212     /**
213      * Checks if {@code sequence} is a sequence of TLS packets. Note: the current implementation relies on inspection
214      * of the port numbers when deciding between TLS vs. non-TLS. Therefore, only the first packet of {@code sequence}
215      * is examined as it is assumed that all packets in {@code sequence} pertain to the same {@link Conversation} and
216      * hence share the same set of two src/dst port numbers (albeit possibly alternating between which one is the src
217      * and which one is the dst, as packets in {@code sequence} may be in alternating directions).
218      * @param sequence The sequence of packets for which it is to be determined if it is a sequence of TLS packets or
219      *                 non-TLS packets.
220      * @return {@code true} if {@code sequence} is a sequence of TLS packets, {@code false} otherwise.
221      */
222     private boolean isTlsSequence(List<PcapPacket> sequence) {
223         // NOTE: Assumes ALL packets in sequence pertain to the same TCP connection!
224         PcapPacket firstPkt = sequence.get(0);
225         int srcPort = getSourcePort(firstPkt);
226         int dstPort = getDestinationPort(firstPkt);
227         return TcpConversationUtils.isTlsPort(srcPort) || TcpConversationUtils.isTlsPort(dstPort);
228     }
229
230     /**
231      * Examine if a given sequence of packets ({@code sequence}) contains a given shorter sequence of packets
232      * ({@code subsequence}). Note: the current implementation actually searches for a substring as it does not allow
233      * for interleaving packets in {@code sequence} that are not in {@code subsequence}; for example, if
234      * {@code subsequence} consists of packet lengths [2, 3, 5] and {@code sequence} consists of  packet lengths
235      * [2, 3, 4, 5], the result will be that there is no match (because of the interleaving 4). If we are to allow
236      * interleaving packets, we need a modified version of
237      * <a href="https://stackoverflow.com/a/20545604/1214974">this</a>.
238      *
239      * @param subsequence The sequence to search for.
240      * @param sequence The sequence to search.
241      * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
242      *                              {@code subsequenceDirections[i]} is the direction of the packet returned by
243      *                              {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
244      *                              internally compute the packet directions.
245      * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
246      *                           {@code sequenceDirections[i]} is the direction of the packet returned by
247      *                           {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
248      *                           compute the packet directions.
249      *
250      * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
251      *         {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
252      */
253     private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
254                                                                  List<PcapPacket> sequence,
255                                                                  Conversation.Direction[] subsequenceDirections,
256                                                                  Conversation.Direction[] sequenceDirections) {
257         if (sequence.size() < subsequence.size()) {
258             // If subsequence is longer, it cannot be contained in sequence.
259             return Optional.empty();
260         }
261         if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
262             // We consider it a mismatch if one is a TLS application data sequence and the other is not.
263             return Optional.empty();
264         }
265         // If packet directions have not been precomputed by calling code, we need to construct them.
266         if (subsequenceDirections == null) {
267             subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
268         }
269         if (sequenceDirections == null) {
270             sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
271         }
272         int subseqIdx = 0;
273         int seqIdx = 0;
274         while (seqIdx < sequence.size()) {
275             PcapPacket subseqPkt = subsequence.get(subseqIdx);
276             PcapPacket seqPkt = sequence.get(seqIdx);
277             // We only have a match if packet lengths and directions match.
278             if (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
279                     subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
280                 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
281                 subseqIdx++;
282                 seqIdx++;
283                 if (subseqIdx == subsequence.size()) {
284                     // We managed to match the entire subsequence in sequence.
285                     // Return the sublist of sequence that matches subsequence.
286                     /*
287                      * TODO:
288                      * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
289                      * for live traces!
290                      */
291                     return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
292                 }
293             } else {
294                 // Mismatch.
295                 if (subseqIdx > 0) {
296                     /*
297                      * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
298                      * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
299                      * leave seqIdx untouched.
300                      */
301                     subseqIdx = 0;
302                 } else {
303                     /*
304                      * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
305                      * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
306                      * sequence.
307                      */
308                     seqIdx++;
309                 }
310             }
311         }
312         return Optional.empty();
313     }
314
315     /**
316      * Overload the same method with relaxed matching.
317      *
318      * @param subsequence The sequence to search for.
319      * @param sequence The sequence to search.
320      * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
321      *                              {@code subsequenceDirections[i]} is the direction of the packet returned by
322      *                              {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
323      *                              internally compute the packet directions.
324      * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
325      *                           {@code sequenceDirections[i]} is the direction of the packet returned by
326      *                           {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
327      *                           compute the packet directions.
328      * @param delta The delta for relaxed matching
329      * @param packetSet The set of unique packet lengths, whose matching is to be relaxed
330      *
331      * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
332      *         {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
333      */
334     private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
335                                                                  List<PcapPacket> sequence,
336                                                                  Conversation.Direction[] subsequenceDirections,
337                                                                  Conversation.Direction[] sequenceDirections,
338                                                                  int delta,
339                                                                  Set<Integer> packetSet) {
340         if (sequence.size() < subsequence.size()) {
341             // If subsequence is longer, it cannot be contained in sequence.
342             return Optional.empty();
343         }
344         if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
345             // We consider it a mismatch if one is a TLS application data sequence and the other is not.
346             return Optional.empty();
347         }
348         // If packet directions have not been precomputed by calling code, we need to construct them.
349         if (subsequenceDirections == null) {
350             subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
351         }
352         if (sequenceDirections == null) {
353             sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
354         }
355         int subseqIdx = 0;
356         int seqIdx = 0;
357         while (seqIdx < sequence.size()) {
358             PcapPacket subseqPkt = subsequence.get(subseqIdx);
359             PcapPacket seqPkt = sequence.get(seqIdx);
360             // We only have a match if packet lengths and directions match.
361             // Do relaxed matching here if applicable
362             if ((delta > 0 && packetSet.contains(subseqPkt.getOriginalLength()) &&
363                     subseqPkt.getOriginalLength() - delta <= seqPkt.getOriginalLength() &&
364                     seqPkt.getOriginalLength() <= subseqPkt.getOriginalLength() + delta &&
365                     subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) ||
366                     // Or just exact matching
367                     (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
368                      subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx])) {
369                 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
370                 subseqIdx++;
371                 seqIdx++;
372                 if (subseqIdx == subsequence.size()) {
373                     // We managed to match the entire subsequence in sequence.
374                     // Return the sublist of sequence that matches subsequence.
375                     /*
376                      * TODO:
377                      * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
378                      * for live traces!
379                      */
380                     return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
381                 }
382             } else {
383                 // Mismatch.
384                 if (subseqIdx > 0) {
385                     /*
386                      * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
387                      * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
388                      * leave seqIdx untouched.
389                      */
390                     subseqIdx = 0;
391                 } else {
392                     /*
393                      * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
394                      * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
395                      * sequence.
396                      */
397                     seqIdx++;
398                 }
399             }
400         }
401         return Optional.empty();
402     }
403
404     /**
405      * Overloading the method {@code findSubsequenceInSequence} for range-based matching. Instead of a sequence,
406      * we have sequences of lower and upper bounds.
407      *
408      * @param lowerBound The lower bound of the sequence we search for.
409      * @param upperBound The upper bound of the sequence we search for.
410      * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
411      *                              {@code subsequenceDirections[i]} is the direction of the packet returned by
412      *                              {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
413      *                              internally compute the packet directions.
414      * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
415      *                           {@code sequenceDirections[i]} is the direction of the packet returned by
416      *                           {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
417      *                           compute the packet directions.
418      *
419      * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
420      *         {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
421      */
422     private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> lowerBound,
423                                                                  List<PcapPacket> upperBound,
424                                                                  List<PcapPacket> sequence,
425                                                                  Conversation.Direction[] subsequenceDirections,
426                                                                  Conversation.Direction[] sequenceDirections) {
427         // Just do the checks for either lower or upper bound!
428         // TODO: For now we use just the lower bound
429         if (sequence.size() < lowerBound.size()) {
430             // If subsequence is longer, it cannot be contained in sequence.
431             return Optional.empty();
432         }
433         if (isTlsSequence(lowerBound) != isTlsSequence(sequence)) {
434             // We consider it a mismatch if one is a TLS application data sequence and the other is not.
435             return Optional.empty();
436         }
437         // If packet directions have not been precomputed by calling code, we need to construct them.
438         if (subsequenceDirections == null) {
439             subsequenceDirections = getPacketDirections(lowerBound, mRouterWanIp);
440         }
441         if (sequenceDirections == null) {
442             sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
443         }
444         int subseqIdx = 0;
445         int seqIdx = 0;
446         while (seqIdx < sequence.size()) {
447             PcapPacket lowBndPkt = lowerBound.get(subseqIdx);
448             PcapPacket upBndPkt = upperBound.get(subseqIdx);
449             PcapPacket seqPkt = sequence.get(seqIdx);
450             // We only have a match if packet lengths and directions match.
451             // The packet lengths have to be in the range of [lowerBound - eps, upperBound+eps]
452             // We initialize the lower and upper bounds first
453             int epsLowerBound = lowBndPkt.length();
454             int epsUpperBound = upBndPkt.length();
455             // Do strict matching if the lower and upper bounds are the same length
456             // Do range matching with eps otherwise
457             if (epsLowerBound != epsUpperBound) {
458                 // TODO: Maybe we could do better here for the double to integer conversion?
459                 epsLowerBound = epsLowerBound - (int) mEps;
460                 epsUpperBound = epsUpperBound + (int) mEps;
461             }
462             if (epsLowerBound <= seqPkt.getOriginalLength() &&
463                     seqPkt.getOriginalLength() <= epsUpperBound &&
464                     subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
465                 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
466                 subseqIdx++;
467                 seqIdx++;
468                 if (subseqIdx == lowerBound.size()) {
469                     // We managed to match the entire subsequence in sequence.
470                     // Return the sublist of sequence that matches subsequence.
471                     /*
472                      * TODO:
473                      * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
474                      * for live traces!
475                      */
476                     return Optional.of(sequence.subList(seqIdx - lowerBound.size(), seqIdx));
477                 }
478             } else {
479                 // Mismatch.
480                 if (subseqIdx > 0) {
481                     /*
482                      * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
483                      * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
484                      * leave seqIdx untouched.
485                      */
486                     subseqIdx = 0;
487                 } else {
488                     /*
489                      * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
490                      * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
491                      * sequence.
492                      */
493                     seqIdx++;
494                 }
495             }
496         }
497         return Optional.empty();
498     }
499
500     // TODO: EXPERIMENT WITH ONLY PACKET DIRECTION AND TIMING
501 //    private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
502 //                                                                 List<PcapPacket> sequence,
503 //                                                                 Conversation.Direction[] subsequenceDirections,
504 //                                                                 Conversation.Direction[] sequenceDirections) {
505 //        if (sequence.size() < subsequence.size()) {
506 //            // If subsequence is longer, it cannot be contained in sequence.
507 //            return Optional.empty();
508 //        }
509 //        if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
510 //            // We consider it a mismatch if one is a TLS application data sequence and the other is not.
511 //            return Optional.empty();
512 //        }
513 //        // If packet directions have not been precomputed by calling code, we need to construct them.
514 //        if (subsequenceDirections == null) {
515 //            subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
516 //        }
517 //        if (sequenceDirections == null) {
518 //            sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
519 //        }
520 //        int subseqIdx = 0;
521 //        int seqIdx = 0;
522 //        while (subseqIdx < subsequence.size() && seqIdx < sequence.size()) {
523 //            // We only have a match if packet lengths and directions match.
524 //            if (subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
525 //                // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
526 //                subseqIdx++;
527 //                seqIdx++;
528 //                if (subseqIdx == subsequence.size()) {
529 //                    // We managed to match the entire subsequence in sequence.
530 //                    // Return the sublist of sequence that matches subsequence.
531 //                    /*
532 //                     * TODO:
533 //                     * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
534 //                     * for live traces!
535 //                     */
536 //                    // TODO: ALSO CHECK TIMING CONSTRAINT
537 //                    PcapPacket firstPacket = sequence.get(seqIdx - subsequence.size());
538 //                    PcapPacket lastPacket = sequence.get(seqIdx-1);
539 //                    if (!lastPacket.getTimestamp().isAfter(firstPacket.getTimestamp().plusMillis(mInclusionTimeMillis))) {
540 //                        return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
541 //                    }
542 //                }
543 //            } else {
544 //                // Mismatch.
545 //                if (subseqIdx > 0) {
546 //                    /*
547 //                     * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
548 //                     * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
549 //                     * leave seqIdx untouched.
550 //                     */
551 //                    subseqIdx = 0;
552 //                } else {
553 //                    /*
554 //                     * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
555 //                     * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
556 //                     * sequence.
557 //                     */
558 //                    seqIdx++;
559 //                }
560 //            }
561 //        }
562 //        return Optional.empty();
563 //    }
564 //
565 //    private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> lowerBound,
566 //                                                                 List<PcapPacket> upperBound,
567 //                                                                 List<PcapPacket> sequence,
568 //                                                                 Conversation.Direction[] subsequenceDirections,
569 //                                                                 Conversation.Direction[] sequenceDirections) {
570 //        // Just do the checks for either lower or upper bound!
571 //        // TODO: For now we use just the lower bound
572 //        if (sequence.size() < lowerBound.size()) {
573 //            // If subsequence is longer, it cannot be contained in sequence.
574 //            return Optional.empty();
575 //        }
576 //        if (isTlsSequence(lowerBound) != isTlsSequence(sequence)) {
577 //            // We consider it a mismatch if one is a TLS application data sequence and the other is not.
578 //            return Optional.empty();
579 //        }
580 //        // If packet directions have not been precomputed by calling code, we need to construct them.
581 //        if (subsequenceDirections == null) {
582 //            subsequenceDirections = getPacketDirections(lowerBound, mRouterWanIp);
583 //        }
584 //        if (sequenceDirections == null) {
585 //            sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
586 //        }
587 //        int subseqIdx = 0;
588 //        int seqIdx = 0;
589 //        while (subseqIdx < lowerBound.size() && seqIdx < sequence.size()) {
590 //            // TODO: ONLY MATCH PACKET DIRECTIONS
591 //            if (subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
592 //                // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
593 //                subseqIdx++;
594 //                seqIdx++;
595 //                if (subseqIdx == lowerBound.size()) {
596 //                    // We managed to match the entire subsequence in sequence.
597 //                    // Return the sublist of sequence that matches subsequence.
598 //                    /*
599 //                     * TODO:
600 //                     * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
601 //                     * for live traces!
602 //                     */
603 //                    // TODO: ALSO CHECK TIMING CONSTRAINT
604 //                    PcapPacket firstPacket = sequence.get(seqIdx - lowerBound.size());
605 //                    PcapPacket lastPacket = sequence.get(seqIdx);
606 //                    if (!lastPacket.getTimestamp().isAfter(firstPacket.getTimestamp().plusMillis(mInclusionTimeMillis))) {
607 //                        return Optional.of(sequence.subList(seqIdx - lowerBound.size(), seqIdx));
608 //                    }
609 //                }
610 //            } else {
611 //                // Mismatch.
612 //                if (subseqIdx > 0) {
613 //                    /*
614 //                     * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
615 //                     * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
616 //                     * leave seqIdx untouched.
617 //                     */
618 //                    subseqIdx = 0;
619 //                } else {
620 //                    /*
621 //                     * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
622 //                     * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
623 //                     * sequence.
624 //                     */
625 //                    seqIdx++;
626 //                }
627 //            }
628 //        }
629 //        return Optional.empty();
630 //    }
631
632     /**
633      * Given a cluster, produces a pruned version of that cluster. In the pruned version, there are no duplicate cluster
634      * members. Two cluster members are considered identical if their packets lengths and packet directions are
635      * identical. The resulting pruned cluster is unmodifiable (this applies to both the outermost list as well as the
636      * nested lists) in order to preserve its integrity when exposed to external code (e.g., through
637      * {@link #getCluster()}).
638      *
639      * @param cluster A cluster to prune.
640      * @return The resulting pruned cluster.
641      */
642     @Override
643     protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
644         List<List<PcapPacket>> prunedCluster = new ArrayList<>();
645         for (List<PcapPacket> originalClusterSeq : cluster) {
646             boolean alreadyPresent = false;
647             for (List<PcapPacket> prunedClusterSeq : prunedCluster) {
648                 Optional<List<PcapPacket>> duplicate = findSubsequenceInSequence(originalClusterSeq, prunedClusterSeq,
649                         mClusterMemberDirections, mClusterMemberDirections);
650                 if (duplicate.isPresent()) {
651                     alreadyPresent = true;
652                     break;
653                 }
654             }
655             if (!alreadyPresent) {
656                 prunedCluster.add(Collections.unmodifiableList(originalClusterSeq));
657             }
658         }
659         return Collections.unmodifiableList(prunedCluster);
660     }
661
662     /**
663      * Given a {@code List<PcapPacket>}, generate a {@code Conversation.Direction[]} such that each entry in the
664      * resulting {@code Conversation.Direction[]} specifies the direction of the {@link PcapPacket} at the corresponding
665      * index in the input list.
666      * @param packets The list of packets for which to construct a corresponding array of packet directions.
667      * @param routerWanIp The IP of the router's WAN port. This is used for determining the direction of packets when
668      *                    the traffic is captured just outside the local network (at the ISP side of the router). Set to
669      *                    {@code null} if {@code packets} stem from traffic captured within the local network.
670      * @return A {@code Conversation.Direction[]} specifying the direction of the {@link PcapPacket} at the
671      *         corresponding index in {@code packets}.
672      */
673     private static Conversation.Direction[] getPacketDirections(List<PcapPacket> packets, String routerWanIp) {
674         Conversation.Direction[] directions = new Conversation.Direction[packets.size()];
675         for (int i = 0; i < packets.size(); i++) {
676             PcapPacket pkt = packets.get(i);
677             if (getSourceIp(pkt).equals(getDestinationIp(pkt))) {
678                 // Sanity check: we shouldn't be processing loopback traffic
679                 throw new AssertionError("loopback traffic detected");
680             }
681             if (isSrcIpLocal(pkt) || getSourceIp(pkt).equals(routerWanIp)) {
682                 directions[i] = Conversation.Direction.CLIENT_TO_SERVER;
683             } else if (isDstIpLocal(pkt) || getDestinationIp(pkt).equals(routerWanIp)) {
684                 directions[i] = Conversation.Direction.SERVER_TO_CLIENT;
685             } else {
686                 //throw new IllegalArgumentException("no local IP or router WAN port IP found, can't detect direction");
687             }
688         }
689         return directions;
690     }
691
692 }