Fixing a bug: TcpReassembler needs to use the right router WAN's IP that should be...
[pingpong.git] / Code / Projects / PacketLevelSignatureExtractor / src / main / java / edu / uci / iotproject / detection / layer3 / Layer3ClusterMatcher.java
1 package edu.uci.iotproject.detection.layer3;
2
3 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
4 import edu.uci.iotproject.detection.AbstractClusterMatcher;
5 import edu.uci.iotproject.detection.ClusterMatcherObserver;
6 import edu.uci.iotproject.trafficreassembly.layer3.Conversation;
7 import edu.uci.iotproject.trafficreassembly.layer3.TcpReassembler;
8 import edu.uci.iotproject.analysis.TcpConversationUtils;
9 import edu.uci.iotproject.io.PcapHandleReader;
10 import edu.uci.iotproject.util.PrintUtils;
11 import org.pcap4j.core.*;
12
13 import java.time.ZoneId;
14 import java.util.*;
15 import java.util.stream.Collectors;
16
17 import static edu.uci.iotproject.util.PcapPacketUtils.*;
18
19 /**
20  * Searches a traffic trace for sequences of packets "belong to" a given cluster (in other words, attempts to classify
21  * traffic as pertaining to a given cluster).
22  *
23  * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
24  * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
25  */
26 public class Layer3ClusterMatcher extends AbstractClusterMatcher implements PacketListener {
27
28     /**
29      * The ordered directions of packets in the sequences that make up {@link #mCluster}.
30      */
31     private final Conversation.Direction[] mClusterMemberDirections;
32
33     /**
34      * For reassembling the observed traffic into TCP connections.
35      */
36     private final TcpReassembler mTcpReassembler;
37
38     /**
39      * IP of the router's WAN port (if analyzed traffic is captured at the ISP's point of view).
40      */
41     private final String mRouterWanIp;
42
43     /**
44      * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
45      */
46     private final double mEps;
47
48     /**
49      * The packet inclusion time for signature.
50      */
51     private int mInclusionTimeMillis;
52
53     /**
54      * Create a {@link Layer3ClusterMatcher}.
55      * @param cluster The cluster that traffic is matched against.
56      * @param routerWanIp The router's WAN IP if examining traffic captured at the ISP's point of view (used for
57      *                    determining the direction of packets).
58      * @param inclusionTimeMillis The packet inclusion time for signature.
59      * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
60      * @param eps The epsilon value used in the DBSCAN algorithm.
61      * @param detectionObservers Client code that wants to get notified whenever the {@link Layer3ClusterMatcher} detects that
62      *                          (a subset of) the examined traffic is similar to the traffic that makes up
63      *                          {@code cluster}, i.e., when the examined traffic is classified as pertaining to
64      *                          {@code cluster}.
65      */
66     public Layer3ClusterMatcher(List<List<PcapPacket>> cluster, String routerWanIp, int inclusionTimeMillis,
67                                 boolean isRangeBased, double eps,
68                                 ClusterMatcherObserver... detectionObservers) {
69         super(cluster, isRangeBased);
70         Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
71         for (ClusterMatcherObserver obs : detectionObservers) {
72             addObserver(obs);
73         }
74         // Build the cluster members' direction sequence.
75         // Note: assumes that the provided cluster was captured within the local network (routerWanIp is set to null).
76         mClusterMemberDirections = getPacketDirections(cluster.get(0), null);
77         /*
78          * Enforce restriction on cluster members: all representatives must exhibit the same direction pattern and
79          * contain the same number of packets. Note that this is a somewhat heavy operation, so it may be disabled later
80          * on in favor of performance. However, it is only run once (at instantiation), so the overhead may be warranted
81          * in order to ensure correctness, especially during the development/debugging phase.
82          */
83         if (!isRangeBased) {    // Only when it is not range-based
84             if (mCluster.stream().
85                     anyMatch(inner -> !Arrays.equals(mClusterMemberDirections, getPacketDirections(inner, null)))) {
86                 throw new IllegalArgumentException(
87                         "cluster members must contain the same number of packets and exhibit the same packet direction " +
88                                 "pattern"
89                 );
90             }
91         }
92         mEps = eps;
93         mRouterWanIp = routerWanIp;
94                                 mTcpReassembler = new TcpReassembler(mRouterWanIp);
95         mInclusionTimeMillis =
96                 inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
97     }
98
99     @Override
100     public void gotPacket(PcapPacket packet) {
101         // Present packet to TCP reassembler so that it can be mapped to a connection (if it is a TCP packet).
102         mTcpReassembler.gotPacket(packet);
103     }
104
105     /**
106      * Get the cluster that describes the packet sequence that this {@link Layer3ClusterMatcher} is searching for.
107      * @return the cluster that describes the packet sequence that this {@link Layer3ClusterMatcher} is searching for.
108      */
109     public List<List<PcapPacket>> getCluster() {
110         return mCluster;
111     }
112
113     public void performDetectionRangeBased() {
114         /*
115          * Let's start out simple by building a version that only works for signatures that do not span across multiple
116          * TCP conversations...
117          */
118         for (Conversation c : mTcpReassembler.getTcpConversations()) {
119             if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
120                 // Skip empty conversations.
121                 continue;
122             }
123             List<PcapPacket> lowerBound = mCluster.get(0);
124             List<PcapPacket> upperBound = mCluster.get(1);
125             if (isTlsSequence(lowerBound) != c.isTls() || isTlsSequence(upperBound) != c.isTls()) {
126                 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
127                 continue;
128             }
129             // Fetch set of packets to examine based on TLS or not.
130             List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
131             Optional<List<PcapPacket>> match;
132             while ((match = findSubsequenceInSequence(lowerBound, upperBound, cPkts, mClusterMemberDirections, null)).
133                     isPresent()) {
134                 List<PcapPacket> matchSeq = match.get();
135                 // Notify observers about the match.
136                 // Max number of skipped packets in layer 3 is 0 (no skipped packets)
137                 mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
138                 /*
139                  * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
140                  * signature sequence.
141                  */
142                 int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
143                 // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
144                 cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
145             }
146         }
147     }
148
149     public void performDetectionConservative() {
150         /*
151          * Let's start out simple by building a version that only works for signatures that do not span across multiple
152          * TCP conversations...
153          */
154         for (Conversation c : mTcpReassembler.getTcpConversations()) {
155             if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
156                 // Skip empty conversations.
157                 continue;
158             }
159             for (List<PcapPacket> signatureSequence : mCluster) {
160                 if (isTlsSequence(signatureSequence) != c.isTls()) {
161                     // We consider it a mismatch if one is a TLS application data sequence and the other is not.
162                     continue;
163                 }
164                 // Fetch set of packets to examine based on TLS or not.
165                 List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
166                 /*
167                  * Note: we embed the attempt to detect the signature sequence in a loop in order to capture those cases
168                  * where the same signature sequence appears multiple times in one Conversation.
169                  *
170                  * Note: since we expect all sequences that together make up the signature to exhibit the same direction
171                  * pattern, we can simply pass the precomputed direction array for the signature sequence so that it
172                  * won't have to be recomputed internally in each call to findSubsequenceInSequence().
173                  */
174                 Optional<List<PcapPacket>> match;
175                 while ((match = findSubsequenceInSequence(signatureSequence, cPkts, mClusterMemberDirections, null)).
176                         isPresent()) {
177                     List<PcapPacket> matchSeq = match.get();
178                     // Notify observers about the match.
179                     // Max number of skipped packets in layer 3 is 0 (no skipped packets)
180                     mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
181                     /*
182                      * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
183                      * signature sequence.
184                      */
185                     int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
186                     // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
187                     cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
188                 }
189             }
190
191             /*
192              * TODO:
193              * if no item in cluster matches, also perform a distance-based matching to cover those cases where we did
194              * not manage to capture every single mutation of the sequence during training.
195              *
196              * Need to compute average/centroid of cluster to do so...? Compute within-cluster variance, then check if
197              * distance between input conversation and cluster average/centroid is smaller than or equal to the computed
198              * variance?
199              */
200         }
201     }
202
203     /**
204      * Checks if {@code sequence} is a sequence of TLS packets. Note: the current implementation relies on inspection
205      * of the port numbers when deciding between TLS vs. non-TLS. Therefore, only the first packet of {@code sequence}
206      * is examined as it is assumed that all packets in {@code sequence} pertain to the same {@link Conversation} and
207      * hence share the same set of two src/dst port numbers (albeit possibly alternating between which one is the src
208      * and which one is the dst, as packets in {@code sequence} may be in alternating directions).
209      * @param sequence The sequence of packets for which it is to be determined if it is a sequence of TLS packets or
210      *                 non-TLS packets.
211      * @return {@code true} if {@code sequence} is a sequence of TLS packets, {@code false} otherwise.
212      */
213     private boolean isTlsSequence(List<PcapPacket> sequence) {
214         // NOTE: Assumes ALL packets in sequence pertain to the same TCP connection!
215         PcapPacket firstPkt = sequence.get(0);
216         int srcPort = getSourcePort(firstPkt);
217         int dstPort = getDestinationPort(firstPkt);
218         return TcpConversationUtils.isTlsPort(srcPort) || TcpConversationUtils.isTlsPort(dstPort);
219     }
220
221     /**
222      * Examine if a given sequence of packets ({@code sequence}) contains a given shorter sequence of packets
223      * ({@code subsequence}). Note: the current implementation actually searches for a substring as it does not allow
224      * for interleaving packets in {@code sequence} that are not in {@code subsequence}; for example, if
225      * {@code subsequence} consists of packet lengths [2, 3, 5] and {@code sequence} consists of  packet lengths
226      * [2, 3, 4, 5], the result will be that there is no match (because of the interleaving 4). If we are to allow
227      * interleaving packets, we need a modified version of
228      * <a href="https://stackoverflow.com/a/20545604/1214974">this</a>.
229      *
230      * @param subsequence The sequence to search for.
231      * @param sequence The sequence to search.
232      * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
233      *                              {@code subsequenceDirections[i]} is the direction of the packet returned by
234      *                              {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
235      *                              internally compute the packet directions.
236      * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
237      *                           {@code sequenceDirections[i]} is the direction of the packet returned by
238      *                           {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
239      *                           compute the packet directions.
240      *
241      * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
242      *         {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
243      */
244     private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
245                                                                  List<PcapPacket> sequence,
246                                                                  Conversation.Direction[] subsequenceDirections,
247                                                                  Conversation.Direction[] sequenceDirections) {
248         if (sequence.size() < subsequence.size()) {
249             // If subsequence is longer, it cannot be contained in sequence.
250             return Optional.empty();
251         }
252         if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
253             // We consider it a mismatch if one is a TLS application data sequence and the other is not.
254             return Optional.empty();
255         }
256         // If packet directions have not been precomputed by calling code, we need to construct them.
257         if (subsequenceDirections == null) {
258             subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
259         }
260         if (sequenceDirections == null) {
261             sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
262         }
263         int subseqIdx = 0;
264         int seqIdx = 0;
265         while (seqIdx < sequence.size()) {
266             PcapPacket subseqPkt = subsequence.get(subseqIdx);
267             PcapPacket seqPkt = sequence.get(seqIdx);
268             // We only have a match if packet lengths and directions match.
269             if (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
270                     subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
271                 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
272                 subseqIdx++;
273                 seqIdx++;
274                 if (subseqIdx == subsequence.size()) {
275                     // We managed to match the entire subsequence in sequence.
276                     // Return the sublist of sequence that matches subsequence.
277                     /*
278                      * TODO:
279                      * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
280                      * for live traces!
281                      */
282                     return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
283                 }
284             } else {
285                 // Mismatch.
286                 if (subseqIdx > 0) {
287                     /*
288                      * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
289                      * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
290                      * leave seqIdx untouched.
291                      */
292                     subseqIdx = 0;
293                 } else {
294                     /*
295                      * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
296                      * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
297                      * sequence.
298                      */
299                     seqIdx++;
300                 }
301             }
302         }
303         return Optional.empty();
304     }
305
306     /**
307      * Overloading the method {@code findSubsequenceInSequence} for range-based matching. Instead of a sequence,
308      * we have sequences of lower and upper bounds.
309      *
310      * @param lowerBound The lower bound of the sequence we search for.
311      * @param upperBound The upper bound of the sequence we search for.
312      * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
313      *                              {@code subsequenceDirections[i]} is the direction of the packet returned by
314      *                              {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
315      *                              internally compute the packet directions.
316      * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
317      *                           {@code sequenceDirections[i]} is the direction of the packet returned by
318      *                           {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
319      *                           compute the packet directions.
320      *
321      * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
322      *         {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
323      */
324     private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> lowerBound,
325                                                                  List<PcapPacket> upperBound,
326                                                                  List<PcapPacket> sequence,
327                                                                  Conversation.Direction[] subsequenceDirections,
328                                                                  Conversation.Direction[] sequenceDirections) {
329         // Just do the checks for either lower or upper bound!
330         // TODO: For now we use just the lower bound
331         if (sequence.size() < lowerBound.size()) {
332             // If subsequence is longer, it cannot be contained in sequence.
333             return Optional.empty();
334         }
335         if (isTlsSequence(lowerBound) != isTlsSequence(sequence)) {
336             // We consider it a mismatch if one is a TLS application data sequence and the other is not.
337             return Optional.empty();
338         }
339         // If packet directions have not been precomputed by calling code, we need to construct them.
340         if (subsequenceDirections == null) {
341             subsequenceDirections = getPacketDirections(lowerBound, mRouterWanIp);
342         }
343         if (sequenceDirections == null) {
344             sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
345         }
346         int subseqIdx = 0;
347         int seqIdx = 0;
348         while (seqIdx < sequence.size()) {
349             PcapPacket lowBndPkt = lowerBound.get(subseqIdx);
350             PcapPacket upBndPkt = upperBound.get(subseqIdx);
351             PcapPacket seqPkt = sequence.get(seqIdx);
352             // We only have a match if packet lengths and directions match.
353             // The packet lengths have to be in the range of [lowerBound - eps, upperBound+eps]
354             // We initialize the lower and upper bounds first
355             int epsLowerBound = lowBndPkt.length();
356             int epsUpperBound = upBndPkt.length();
357             // Do strict matching if the lower and upper bounds are the same length
358             // Do range matching with eps otherwise
359             if (epsLowerBound != epsUpperBound) {
360                 // TODO: Maybe we could do better here for the double to integer conversion?
361                 epsLowerBound = epsLowerBound - (int) mEps;
362                 epsUpperBound = epsUpperBound + (int) mEps;
363             }
364             if (epsLowerBound <= seqPkt.getOriginalLength() &&
365                     seqPkt.getOriginalLength() <= epsUpperBound &&
366                     subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
367                 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
368                 subseqIdx++;
369                 seqIdx++;
370                 if (subseqIdx == lowerBound.size()) {
371                     // We managed to match the entire subsequence in sequence.
372                     // Return the sublist of sequence that matches subsequence.
373                     /*
374                      * TODO:
375                      * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
376                      * for live traces!
377                      */
378                     return Optional.of(sequence.subList(seqIdx - lowerBound.size(), seqIdx));
379                 }
380             } else {
381                 // Mismatch.
382                 if (subseqIdx > 0) {
383                     /*
384                      * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
385                      * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
386                      * leave seqIdx untouched.
387                      */
388                     subseqIdx = 0;
389                 } else {
390                     /*
391                      * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
392                      * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
393                      * sequence.
394                      */
395                     seqIdx++;
396                 }
397             }
398         }
399         return Optional.empty();
400     }
401
402     // TODO: EXPERIMENT WITH ONLY PACKET DIRECTION AND TIMING
403 //    private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
404 //                                                                 List<PcapPacket> sequence,
405 //                                                                 Conversation.Direction[] subsequenceDirections,
406 //                                                                 Conversation.Direction[] sequenceDirections) {
407 //        if (sequence.size() < subsequence.size()) {
408 //            // If subsequence is longer, it cannot be contained in sequence.
409 //            return Optional.empty();
410 //        }
411 //        if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
412 //            // We consider it a mismatch if one is a TLS application data sequence and the other is not.
413 //            return Optional.empty();
414 //        }
415 //        // If packet directions have not been precomputed by calling code, we need to construct them.
416 //        if (subsequenceDirections == null) {
417 //            subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
418 //        }
419 //        if (sequenceDirections == null) {
420 //            sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
421 //        }
422 //        int subseqIdx = 0;
423 //        int seqIdx = 0;
424 //        while (subseqIdx < subsequence.size() && seqIdx < sequence.size()) {
425 //            // We only have a match if packet lengths and directions match.
426 //            if (subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
427 //                // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
428 //                subseqIdx++;
429 //                seqIdx++;
430 //                if (subseqIdx == subsequence.size()) {
431 //                    // We managed to match the entire subsequence in sequence.
432 //                    // Return the sublist of sequence that matches subsequence.
433 //                    /*
434 //                     * TODO:
435 //                     * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
436 //                     * for live traces!
437 //                     */
438 //                    // TODO: ALSO CHECK TIMING CONSTRAINT
439 //                    PcapPacket firstPacket = sequence.get(seqIdx - subsequence.size());
440 //                    PcapPacket lastPacket = sequence.get(seqIdx-1);
441 //                    if (!lastPacket.getTimestamp().isAfter(firstPacket.getTimestamp().plusMillis(mInclusionTimeMillis))) {
442 //                        return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
443 //                    }
444 //                }
445 //            } else {
446 //                // Mismatch.
447 //                if (subseqIdx > 0) {
448 //                    /*
449 //                     * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
450 //                     * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
451 //                     * leave seqIdx untouched.
452 //                     */
453 //                    subseqIdx = 0;
454 //                } else {
455 //                    /*
456 //                     * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
457 //                     * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
458 //                     * sequence.
459 //                     */
460 //                    seqIdx++;
461 //                }
462 //            }
463 //        }
464 //        return Optional.empty();
465 //    }
466 //
467 //    private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> lowerBound,
468 //                                                                 List<PcapPacket> upperBound,
469 //                                                                 List<PcapPacket> sequence,
470 //                                                                 Conversation.Direction[] subsequenceDirections,
471 //                                                                 Conversation.Direction[] sequenceDirections) {
472 //        // Just do the checks for either lower or upper bound!
473 //        // TODO: For now we use just the lower bound
474 //        if (sequence.size() < lowerBound.size()) {
475 //            // If subsequence is longer, it cannot be contained in sequence.
476 //            return Optional.empty();
477 //        }
478 //        if (isTlsSequence(lowerBound) != isTlsSequence(sequence)) {
479 //            // We consider it a mismatch if one is a TLS application data sequence and the other is not.
480 //            return Optional.empty();
481 //        }
482 //        // If packet directions have not been precomputed by calling code, we need to construct them.
483 //        if (subsequenceDirections == null) {
484 //            subsequenceDirections = getPacketDirections(lowerBound, mRouterWanIp);
485 //        }
486 //        if (sequenceDirections == null) {
487 //            sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
488 //        }
489 //        int subseqIdx = 0;
490 //        int seqIdx = 0;
491 //        while (subseqIdx < lowerBound.size() && seqIdx < sequence.size()) {
492 //            // TODO: ONLY MATCH PACKET DIRECTIONS
493 //            if (subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
494 //                // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
495 //                subseqIdx++;
496 //                seqIdx++;
497 //                if (subseqIdx == lowerBound.size()) {
498 //                    // We managed to match the entire subsequence in sequence.
499 //                    // Return the sublist of sequence that matches subsequence.
500 //                    /*
501 //                     * TODO:
502 //                     * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
503 //                     * for live traces!
504 //                     */
505 //                    // TODO: ALSO CHECK TIMING CONSTRAINT
506 //                    PcapPacket firstPacket = sequence.get(seqIdx - lowerBound.size());
507 //                    PcapPacket lastPacket = sequence.get(seqIdx);
508 //                    if (!lastPacket.getTimestamp().isAfter(firstPacket.getTimestamp().plusMillis(mInclusionTimeMillis))) {
509 //                        return Optional.of(sequence.subList(seqIdx - lowerBound.size(), seqIdx));
510 //                    }
511 //                }
512 //            } else {
513 //                // Mismatch.
514 //                if (subseqIdx > 0) {
515 //                    /*
516 //                     * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
517 //                     * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
518 //                     * leave seqIdx untouched.
519 //                     */
520 //                    subseqIdx = 0;
521 //                } else {
522 //                    /*
523 //                     * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
524 //                     * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
525 //                     * sequence.
526 //                     */
527 //                    seqIdx++;
528 //                }
529 //            }
530 //        }
531 //        return Optional.empty();
532 //    }
533
534     /**
535      * Given a cluster, produces a pruned version of that cluster. In the pruned version, there are no duplicate cluster
536      * members. Two cluster members are considered identical if their packets lengths and packet directions are
537      * identical. The resulting pruned cluster is unmodifiable (this applies to both the outermost list as well as the
538      * nested lists) in order to preserve its integrity when exposed to external code (e.g., through
539      * {@link #getCluster()}).
540      *
541      * @param cluster A cluster to prune.
542      * @return The resulting pruned cluster.
543      */
544     @Override
545     protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
546         List<List<PcapPacket>> prunedCluster = new ArrayList<>();
547         for (List<PcapPacket> originalClusterSeq : cluster) {
548             boolean alreadyPresent = false;
549             for (List<PcapPacket> prunedClusterSeq : prunedCluster) {
550                 Optional<List<PcapPacket>> duplicate = findSubsequenceInSequence(originalClusterSeq, prunedClusterSeq,
551                         mClusterMemberDirections, mClusterMemberDirections);
552                 if (duplicate.isPresent()) {
553                     alreadyPresent = true;
554                     break;
555                 }
556             }
557             if (!alreadyPresent) {
558                 prunedCluster.add(Collections.unmodifiableList(originalClusterSeq));
559             }
560         }
561         return Collections.unmodifiableList(prunedCluster);
562     }
563
564     /**
565      * Given a {@code List<PcapPacket>}, generate a {@code Conversation.Direction[]} such that each entry in the
566      * resulting {@code Conversation.Direction[]} specifies the direction of the {@link PcapPacket} at the corresponding
567      * index in the input list.
568      * @param packets The list of packets for which to construct a corresponding array of packet directions.
569      * @param routerWanIp The IP of the router's WAN port. This is used for determining the direction of packets when
570      *                    the traffic is captured just outside the local network (at the ISP side of the router). Set to
571      *                    {@code null} if {@code packets} stem from traffic captured within the local network.
572      * @return A {@code Conversation.Direction[]} specifying the direction of the {@link PcapPacket} at the
573      *         corresponding index in {@code packets}.
574      */
575     private static Conversation.Direction[] getPacketDirections(List<PcapPacket> packets, String routerWanIp) {
576         Conversation.Direction[] directions = new Conversation.Direction[packets.size()];
577         for (int i = 0; i < packets.size(); i++) {
578             PcapPacket pkt = packets.get(i);
579             if (getSourceIp(pkt).equals(getDestinationIp(pkt))) {
580                 // Sanity check: we shouldn't be processing loopback traffic
581                 throw new AssertionError("loopback traffic detected");
582             }
583             if (isSrcIpLocal(pkt) || getSourceIp(pkt).equals(routerWanIp)) {
584                 directions[i] = Conversation.Direction.CLIENT_TO_SERVER;
585             } else if (isDstIpLocal(pkt) || getDestinationIp(pkt).equals(routerWanIp)) {
586                 directions[i] = Conversation.Direction.SERVER_TO_CLIENT;
587             } else {
588                 //throw new IllegalArgumentException("no local IP or router WAN port IP found, can't detect direction");
589             }
590         }
591         return directions;
592     }
593
594 }