1 package edu.uci.iotproject.detection.layer3;
3 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
4 import edu.uci.iotproject.detection.AbstractClusterMatcher;
5 import edu.uci.iotproject.detection.ClusterMatcherObserver;
6 import edu.uci.iotproject.trafficreassembly.layer3.Conversation;
7 import edu.uci.iotproject.trafficreassembly.layer3.TcpReassembler;
8 import edu.uci.iotproject.analysis.TcpConversationUtils;
9 import edu.uci.iotproject.io.PcapHandleReader;
10 import edu.uci.iotproject.util.PrintUtils;
11 import org.pcap4j.core.*;
13 import java.time.ZoneId;
15 import java.util.stream.Collectors;
17 import static edu.uci.iotproject.util.PcapPacketUtils.*;
20 * Searches a traffic trace for sequences of packets "belong to" a given cluster (in other words, attempts to classify
21 * traffic as pertaining to a given cluster).
23 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
24 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
26 public class Layer3ClusterMatcher extends AbstractClusterMatcher implements PacketListener {
29 * The ordered directions of packets in the sequences that make up {@link #mCluster}.
31 private final Conversation.Direction[] mClusterMemberDirections;
34 * For reassembling the observed traffic into TCP connections.
36 private final TcpReassembler mTcpReassembler;
39 * IP of the router's WAN port (if analyzed traffic is captured at the ISP's point of view).
41 private final String mRouterWanIp;
44 * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
46 private final double mEps;
49 * The packet inclusion time for signature.
51 private int mInclusionTimeMillis;
57 private Set<Integer> mPacketSet;
60 * Create a {@link Layer3ClusterMatcher}.
61 * @param cluster The cluster that traffic is matched against.
62 * @param routerWanIp The router's WAN IP if examining traffic captured at the ISP's point of view (used for
63 * determining the direction of packets).
64 * @param inclusionTimeMillis The packet inclusion time for signature.
65 * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
66 * @param eps The epsilon value used in the DBSCAN algorithm.
67 * @param detectionObservers Client code that wants to get notified whenever the {@link Layer3ClusterMatcher} detects that
68 * (a subset of) the examined traffic is similar to the traffic that makes up
69 * {@code cluster}, i.e., when the examined traffic is classified as pertaining to
72 public Layer3ClusterMatcher(List<List<PcapPacket>> cluster, String routerWanIp, int inclusionTimeMillis,
73 boolean isRangeBased, double eps, int delta, Set<Integer> packetSet,
74 ClusterMatcherObserver... detectionObservers) {
75 super(cluster, isRangeBased);
76 Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
77 for (ClusterMatcherObserver obs : detectionObservers) {
80 // Build the cluster members' direction sequence.
81 // Note: assumes that the provided cluster was captured within the local network (routerWanIp is set to null).
82 mClusterMemberDirections = getPacketDirections(cluster.get(0), null);
84 * Enforce restriction on cluster members: all representatives must exhibit the same direction pattern and
85 * contain the same number of packets. Note that this is a somewhat heavy operation, so it may be disabled later
86 * on in favor of performance. However, it is only run once (at instantiation), so the overhead may be warranted
87 * in order to ensure correctness, especially during the development/debugging phase.
89 if (!isRangeBased) { // Only when it is not range-based
90 if (mCluster.stream().
91 anyMatch(inner -> !Arrays.equals(mClusterMemberDirections, getPacketDirections(inner, null)))) {
92 throw new IllegalArgumentException(
93 "cluster members must contain the same number of packets and exhibit the same packet direction " +
99 mRouterWanIp = routerWanIp;
100 mTcpReassembler = new TcpReassembler(mRouterWanIp);
101 mInclusionTimeMillis =
102 inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
104 mPacketSet = packetSet;
108 public void gotPacket(PcapPacket packet) {
109 // Present packet to TCP reassembler so that it can be mapped to a connection (if it is a TCP packet).
110 mTcpReassembler.gotPacket(packet);
114 * Get the cluster that describes the packet sequence that this {@link Layer3ClusterMatcher} is searching for.
115 * @return the cluster that describes the packet sequence that this {@link Layer3ClusterMatcher} is searching for.
117 public List<List<PcapPacket>> getCluster() {
121 public void performDetectionRangeBased() {
123 * Let's start out simple by building a version that only works for signatures that do not span across multiple
124 * TCP conversations...
126 for (Conversation c : mTcpReassembler.getTcpConversations()) {
127 if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
128 // Skip empty conversations.
131 List<PcapPacket> lowerBound = mCluster.get(0);
132 List<PcapPacket> upperBound = mCluster.get(1);
133 if (isTlsSequence(lowerBound) != c.isTls() || isTlsSequence(upperBound) != c.isTls()) {
134 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
137 // Fetch set of packets to examine based on TLS or not.
138 List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
139 Optional<List<PcapPacket>> match;
140 while ((match = findSubsequenceInSequence(lowerBound, upperBound, cPkts, mClusterMemberDirections, null)).
142 List<PcapPacket> matchSeq = match.get();
143 // Notify observers about the match.
144 // Max number of skipped packets in layer 3 is 0 (no skipped packets)
145 mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
147 * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
148 * signature sequence.
150 int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
151 // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
152 cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
157 // TODO: Relaxed matching with delta is only applied to conservative matching for now
158 public void performDetectionConservative() {
160 * Let's start out simple by building a version that only works for signatures that do not span across multiple
161 * TCP conversations...
163 for (Conversation c : mTcpReassembler.getTcpConversations()) {
164 if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
165 // Skip empty conversations.
168 for (List<PcapPacket> signatureSequence : mCluster) {
169 if (isTlsSequence(signatureSequence) != c.isTls()) {
170 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
173 // Fetch set of packets to examine based on TLS or not.
174 List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
176 * Note: we embed the attempt to detect the signature sequence in a loop in order to capture those cases
177 * where the same signature sequence appears multiple times in one Conversation.
179 * Note: since we expect all sequences that together make up the signature to exhibit the same direction
180 * pattern, we can simply pass the precomputed direction array for the signature sequence so that it
181 * won't have to be recomputed internally in each call to findSubsequenceInSequence().
183 Optional<List<PcapPacket>> match;
184 while ((match = findSubsequenceInSequence(signatureSequence, cPkts, mClusterMemberDirections, null, mDelta, mPacketSet)).
186 List<PcapPacket> matchSeq = match.get();
187 // Notify observers about the match.
188 // Max number of skipped packets in layer 3 is 0 (no skipped packets)
189 mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
191 * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
192 * signature sequence.
194 int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
195 // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
196 cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
202 * if no item in cluster matches, also perform a distance-based matching to cover those cases where we did
203 * not manage to capture every single mutation of the sequence during training.
205 * Need to compute average/centroid of cluster to do so...? Compute within-cluster variance, then check if
206 * distance between input conversation and cluster average/centroid is smaller than or equal to the computed
213 * Checks if {@code sequence} is a sequence of TLS packets. Note: the current implementation relies on inspection
214 * of the port numbers when deciding between TLS vs. non-TLS. Therefore, only the first packet of {@code sequence}
215 * is examined as it is assumed that all packets in {@code sequence} pertain to the same {@link Conversation} and
216 * hence share the same set of two src/dst port numbers (albeit possibly alternating between which one is the src
217 * and which one is the dst, as packets in {@code sequence} may be in alternating directions).
218 * @param sequence The sequence of packets for which it is to be determined if it is a sequence of TLS packets or
220 * @return {@code true} if {@code sequence} is a sequence of TLS packets, {@code false} otherwise.
222 private boolean isTlsSequence(List<PcapPacket> sequence) {
223 // NOTE: Assumes ALL packets in sequence pertain to the same TCP connection!
224 PcapPacket firstPkt = sequence.get(0);
225 int srcPort = getSourcePort(firstPkt);
226 int dstPort = getDestinationPort(firstPkt);
227 return TcpConversationUtils.isTlsPort(srcPort) || TcpConversationUtils.isTlsPort(dstPort);
231 * Examine if a given sequence of packets ({@code sequence}) contains a given shorter sequence of packets
232 * ({@code subsequence}). Note: the current implementation actually searches for a substring as it does not allow
233 * for interleaving packets in {@code sequence} that are not in {@code subsequence}; for example, if
234 * {@code subsequence} consists of packet lengths [2, 3, 5] and {@code sequence} consists of packet lengths
235 * [2, 3, 4, 5], the result will be that there is no match (because of the interleaving 4). If we are to allow
236 * interleaving packets, we need a modified version of
237 * <a href="https://stackoverflow.com/a/20545604/1214974">this</a>.
239 * @param subsequence The sequence to search for.
240 * @param sequence The sequence to search.
241 * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
242 * {@code subsequenceDirections[i]} is the direction of the packet returned by
243 * {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
244 * internally compute the packet directions.
245 * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
246 * {@code sequenceDirections[i]} is the direction of the packet returned by
247 * {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
248 * compute the packet directions.
250 * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
251 * {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
253 private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
254 List<PcapPacket> sequence,
255 Conversation.Direction[] subsequenceDirections,
256 Conversation.Direction[] sequenceDirections) {
257 if (sequence.size() < subsequence.size()) {
258 // If subsequence is longer, it cannot be contained in sequence.
259 return Optional.empty();
261 if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
262 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
263 return Optional.empty();
265 // If packet directions have not been precomputed by calling code, we need to construct them.
266 if (subsequenceDirections == null) {
267 subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
269 if (sequenceDirections == null) {
270 sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
274 while (seqIdx < sequence.size()) {
275 PcapPacket subseqPkt = subsequence.get(subseqIdx);
276 PcapPacket seqPkt = sequence.get(seqIdx);
277 // We only have a match if packet lengths and directions match.
278 if (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
279 subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
280 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
283 if (subseqIdx == subsequence.size()) {
284 // We managed to match the entire subsequence in sequence.
285 // Return the sublist of sequence that matches subsequence.
288 * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
291 return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
297 * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
298 * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
299 * leave seqIdx untouched.
304 * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
305 * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
312 return Optional.empty();
316 * Overload the same method with relaxed matching.
318 * @param subsequence The sequence to search for.
319 * @param sequence The sequence to search.
320 * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
321 * {@code subsequenceDirections[i]} is the direction of the packet returned by
322 * {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
323 * internally compute the packet directions.
324 * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
325 * {@code sequenceDirections[i]} is the direction of the packet returned by
326 * {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
327 * compute the packet directions.
328 * @param delta The delta for relaxed matching
329 * @param packetSet The set of unique packet lengths, whose matching is to be relaxed
331 * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
332 * {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
334 private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
335 List<PcapPacket> sequence,
336 Conversation.Direction[] subsequenceDirections,
337 Conversation.Direction[] sequenceDirections,
339 Set<Integer> packetSet) {
340 if (sequence.size() < subsequence.size()) {
341 // If subsequence is longer, it cannot be contained in sequence.
342 return Optional.empty();
344 if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
345 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
346 return Optional.empty();
348 // If packet directions have not been precomputed by calling code, we need to construct them.
349 if (subsequenceDirections == null) {
350 subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
352 if (sequenceDirections == null) {
353 sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
357 while (seqIdx < sequence.size()) {
358 PcapPacket subseqPkt = subsequence.get(subseqIdx);
359 PcapPacket seqPkt = sequence.get(seqIdx);
360 // We only have a match if packet lengths and directions match.
361 // Do relaxed matching here if applicable
362 if ((delta > 0 && packetSet.contains(subseqPkt.getOriginalLength()) &&
363 subseqPkt.getOriginalLength() - delta <= seqPkt.getOriginalLength() &&
364 seqPkt.getOriginalLength() <= subseqPkt.getOriginalLength() + delta &&
365 subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) ||
366 // Or just exact matching
367 (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
368 subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx])) {
369 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
372 if (subseqIdx == subsequence.size()) {
373 // We managed to match the entire subsequence in sequence.
374 // Return the sublist of sequence that matches subsequence.
377 * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
380 return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
386 * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
387 * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
388 * leave seqIdx untouched.
393 * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
394 * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
401 return Optional.empty();
405 * Overloading the method {@code findSubsequenceInSequence} for range-based matching. Instead of a sequence,
406 * we have sequences of lower and upper bounds.
408 * @param lowerBound The lower bound of the sequence we search for.
409 * @param upperBound The upper bound of the sequence we search for.
410 * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
411 * {@code subsequenceDirections[i]} is the direction of the packet returned by
412 * {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
413 * internally compute the packet directions.
414 * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
415 * {@code sequenceDirections[i]} is the direction of the packet returned by
416 * {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
417 * compute the packet directions.
419 * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
420 * {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
422 private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> lowerBound,
423 List<PcapPacket> upperBound,
424 List<PcapPacket> sequence,
425 Conversation.Direction[] subsequenceDirections,
426 Conversation.Direction[] sequenceDirections) {
427 // Just do the checks for either lower or upper bound!
428 // TODO: For now we use just the lower bound
429 if (sequence.size() < lowerBound.size()) {
430 // If subsequence is longer, it cannot be contained in sequence.
431 return Optional.empty();
433 if (isTlsSequence(lowerBound) != isTlsSequence(sequence)) {
434 // We consider it a mismatch if one is a TLS application data sequence and the other is not.
435 return Optional.empty();
437 // If packet directions have not been precomputed by calling code, we need to construct them.
438 if (subsequenceDirections == null) {
439 subsequenceDirections = getPacketDirections(lowerBound, mRouterWanIp);
441 if (sequenceDirections == null) {
442 sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
446 while (seqIdx < sequence.size()) {
447 PcapPacket lowBndPkt = lowerBound.get(subseqIdx);
448 PcapPacket upBndPkt = upperBound.get(subseqIdx);
449 PcapPacket seqPkt = sequence.get(seqIdx);
450 // We only have a match if packet lengths and directions match.
451 // The packet lengths have to be in the range of [lowerBound - eps, upperBound+eps]
452 // We initialize the lower and upper bounds first
453 int epsLowerBound = lowBndPkt.length();
454 int epsUpperBound = upBndPkt.length();
455 // Do strict matching if the lower and upper bounds are the same length
456 // Do range matching with eps otherwise
457 if (epsLowerBound != epsUpperBound) {
458 // TODO: Maybe we could do better here for the double to integer conversion?
459 epsLowerBound = epsLowerBound - (int) mEps;
460 epsUpperBound = epsUpperBound + (int) mEps;
462 if (epsLowerBound <= seqPkt.getOriginalLength() &&
463 seqPkt.getOriginalLength() <= epsUpperBound &&
464 subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
465 // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
468 if (subseqIdx == lowerBound.size()) {
469 // We managed to match the entire subsequence in sequence.
470 // Return the sublist of sequence that matches subsequence.
473 * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
476 return Optional.of(sequence.subList(seqIdx - lowerBound.size(), seqIdx));
482 * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
483 * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
484 * leave seqIdx untouched.
489 * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
490 * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
497 return Optional.empty();
500 // TODO: EXPERIMENT WITH ONLY PACKET DIRECTION AND TIMING
501 // private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
502 // List<PcapPacket> sequence,
503 // Conversation.Direction[] subsequenceDirections,
504 // Conversation.Direction[] sequenceDirections) {
505 // if (sequence.size() < subsequence.size()) {
506 // // If subsequence is longer, it cannot be contained in sequence.
507 // return Optional.empty();
509 // if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
510 // // We consider it a mismatch if one is a TLS application data sequence and the other is not.
511 // return Optional.empty();
513 // // If packet directions have not been precomputed by calling code, we need to construct them.
514 // if (subsequenceDirections == null) {
515 // subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
517 // if (sequenceDirections == null) {
518 // sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
520 // int subseqIdx = 0;
522 // while (subseqIdx < subsequence.size() && seqIdx < sequence.size()) {
523 // // We only have a match if packet lengths and directions match.
524 // if (subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
525 // // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
528 // if (subseqIdx == subsequence.size()) {
529 // // We managed to match the entire subsequence in sequence.
530 // // Return the sublist of sequence that matches subsequence.
533 // * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
534 // * for live traces!
536 // // TODO: ALSO CHECK TIMING CONSTRAINT
537 // PcapPacket firstPacket = sequence.get(seqIdx - subsequence.size());
538 // PcapPacket lastPacket = sequence.get(seqIdx-1);
539 // if (!lastPacket.getTimestamp().isAfter(firstPacket.getTimestamp().plusMillis(mInclusionTimeMillis))) {
540 // return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
545 // if (subseqIdx > 0) {
547 // * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
548 // * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
549 // * leave seqIdx untouched.
554 // * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
555 // * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
562 // return Optional.empty();
565 // private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> lowerBound,
566 // List<PcapPacket> upperBound,
567 // List<PcapPacket> sequence,
568 // Conversation.Direction[] subsequenceDirections,
569 // Conversation.Direction[] sequenceDirections) {
570 // // Just do the checks for either lower or upper bound!
571 // // TODO: For now we use just the lower bound
572 // if (sequence.size() < lowerBound.size()) {
573 // // If subsequence is longer, it cannot be contained in sequence.
574 // return Optional.empty();
576 // if (isTlsSequence(lowerBound) != isTlsSequence(sequence)) {
577 // // We consider it a mismatch if one is a TLS application data sequence and the other is not.
578 // return Optional.empty();
580 // // If packet directions have not been precomputed by calling code, we need to construct them.
581 // if (subsequenceDirections == null) {
582 // subsequenceDirections = getPacketDirections(lowerBound, mRouterWanIp);
584 // if (sequenceDirections == null) {
585 // sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
587 // int subseqIdx = 0;
589 // while (subseqIdx < lowerBound.size() && seqIdx < sequence.size()) {
590 // // TODO: ONLY MATCH PACKET DIRECTIONS
591 // if (subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
592 // // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
595 // if (subseqIdx == lowerBound.size()) {
596 // // We managed to match the entire subsequence in sequence.
597 // // Return the sublist of sequence that matches subsequence.
600 // * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
601 // * for live traces!
603 // // TODO: ALSO CHECK TIMING CONSTRAINT
604 // PcapPacket firstPacket = sequence.get(seqIdx - lowerBound.size());
605 // PcapPacket lastPacket = sequence.get(seqIdx);
606 // if (!lastPacket.getTimestamp().isAfter(firstPacket.getTimestamp().plusMillis(mInclusionTimeMillis))) {
607 // return Optional.of(sequence.subList(seqIdx - lowerBound.size(), seqIdx));
612 // if (subseqIdx > 0) {
614 // * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
615 // * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
616 // * leave seqIdx untouched.
621 // * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
622 // * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
629 // return Optional.empty();
633 * Given a cluster, produces a pruned version of that cluster. In the pruned version, there are no duplicate cluster
634 * members. Two cluster members are considered identical if their packets lengths and packet directions are
635 * identical. The resulting pruned cluster is unmodifiable (this applies to both the outermost list as well as the
636 * nested lists) in order to preserve its integrity when exposed to external code (e.g., through
637 * {@link #getCluster()}).
639 * @param cluster A cluster to prune.
640 * @return The resulting pruned cluster.
643 protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
644 List<List<PcapPacket>> prunedCluster = new ArrayList<>();
645 for (List<PcapPacket> originalClusterSeq : cluster) {
646 boolean alreadyPresent = false;
647 for (List<PcapPacket> prunedClusterSeq : prunedCluster) {
648 Optional<List<PcapPacket>> duplicate = findSubsequenceInSequence(originalClusterSeq, prunedClusterSeq,
649 mClusterMemberDirections, mClusterMemberDirections);
650 if (duplicate.isPresent()) {
651 alreadyPresent = true;
655 if (!alreadyPresent) {
656 prunedCluster.add(Collections.unmodifiableList(originalClusterSeq));
659 return Collections.unmodifiableList(prunedCluster);
663 * Given a {@code List<PcapPacket>}, generate a {@code Conversation.Direction[]} such that each entry in the
664 * resulting {@code Conversation.Direction[]} specifies the direction of the {@link PcapPacket} at the corresponding
665 * index in the input list.
666 * @param packets The list of packets for which to construct a corresponding array of packet directions.
667 * @param routerWanIp The IP of the router's WAN port. This is used for determining the direction of packets when
668 * the traffic is captured just outside the local network (at the ISP side of the router). Set to
669 * {@code null} if {@code packets} stem from traffic captured within the local network.
670 * @return A {@code Conversation.Direction[]} specifying the direction of the {@link PcapPacket} at the
671 * corresponding index in {@code packets}.
673 private static Conversation.Direction[] getPacketDirections(List<PcapPacket> packets, String routerWanIp) {
674 Conversation.Direction[] directions = new Conversation.Direction[packets.size()];
675 for (int i = 0; i < packets.size(); i++) {
676 PcapPacket pkt = packets.get(i);
677 if (getSourceIp(pkt).equals(getDestinationIp(pkt))) {
678 // Sanity check: we shouldn't be processing loopback traffic
679 throw new AssertionError("loopback traffic detected");
681 if (isSrcIpLocal(pkt) || getSourceIp(pkt).equals(routerWanIp)) {
682 directions[i] = Conversation.Direction.CLIENT_TO_SERVER;
683 } else if (isDstIpLocal(pkt) || getDestinationIp(pkt).equals(routerWanIp)) {
684 directions[i] = Conversation.Direction.SERVER_TO_CLIENT;
686 //throw new IllegalArgumentException("no local IP or router WAN port IP found, can't detect direction");