Changing the mechanism to count and correlate skipped packets.
[pingpong.git] / Code / Projects / PacketLevelSignatureExtractor / src / main / java / edu / uci / iotproject / detection / layer3 / Layer3ClusterMatcher.java
index b9584ff365f325885df7cb055521b0d9b30d4966..165cdb3e57f68de4319504a3fee4e15cd4efc34e 100644 (file)
@@ -1,5 +1,6 @@
 package edu.uci.iotproject.detection.layer3;
 
 package edu.uci.iotproject.detection.layer3;
 
+import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
 import edu.uci.iotproject.detection.AbstractClusterMatcher;
 import edu.uci.iotproject.detection.ClusterMatcherObserver;
 import edu.uci.iotproject.trafficreassembly.layer3.Conversation;
 import edu.uci.iotproject.detection.AbstractClusterMatcher;
 import edu.uci.iotproject.detection.ClusterMatcherObserver;
 import edu.uci.iotproject.trafficreassembly.layer3.Conversation;
@@ -24,33 +25,6 @@ import static edu.uci.iotproject.util.PcapPacketUtils.*;
  */
 public class Layer3ClusterMatcher extends AbstractClusterMatcher implements PacketListener {
 
  */
 public class Layer3ClusterMatcher extends AbstractClusterMatcher implements PacketListener {
 
-    // Test client
-    public static void main(String[] args) throws PcapNativeException, NotOpenException {
-
-//        String path = "/scratch/July-2018"; // Rahmadi
-        String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
-        final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
-        final String signatureFile = path + "/2018-07/dlink/offSignature1.sig";
-
-        List<List<PcapPacket>> signature = PrintUtils.deserializeClustersFromFile(signatureFile);
-        Layer3ClusterMatcher clusterMatcher = new Layer3ClusterMatcher(signature, null,
-                (sig, match) -> System.out.println(
-                        String.format("[ !!! SIGNATURE DETECTED AT %s !!! ]",
-                                match.get(0).getTimestamp().atZone(ZoneId.of("America/Los_Angeles")))
-                )
-        );
-
-        PcapHandle handle;
-        try {
-            handle = Pcaps.openOffline(inputPcapFile, PcapHandle.TimestampPrecision.NANO);
-        } catch (PcapNativeException pne) {
-            handle = Pcaps.openOffline(inputPcapFile);
-        }
-        PcapHandleReader reader = new PcapHandleReader(handle, p -> true, clusterMatcher);
-        reader.readFromHandle();
-        clusterMatcher.performDetection();
-    }
-
     /**
      * The ordered directions of packets in the sequences that make up {@link #mCluster}.
      */
     /**
      * The ordered directions of packets in the sequences that make up {@link #mCluster}.
      */
@@ -66,19 +40,33 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
      */
     private final String mRouterWanIp;
 
      */
     private final String mRouterWanIp;
 
+    /**
+     * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
+     */
+    private final double mEps;
+
+    /**
+     * The packet inclusion time for signature.
+     */
+    private int mInclusionTimeMillis;
+
     /**
      * Create a {@link Layer3ClusterMatcher}.
      * @param cluster The cluster that traffic is matched against.
      * @param routerWanIp The router's WAN IP if examining traffic captured at the ISP's point of view (used for
      *                    determining the direction of packets).
     /**
      * Create a {@link Layer3ClusterMatcher}.
      * @param cluster The cluster that traffic is matched against.
      * @param routerWanIp The router's WAN IP if examining traffic captured at the ISP's point of view (used for
      *                    determining the direction of packets).
+     * @param inclusionTimeMillis The packet inclusion time for signature.
+     * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
+     * @param eps The epsilon value used in the DBSCAN algorithm.
      * @param detectionObservers Client code that wants to get notified whenever the {@link Layer3ClusterMatcher} detects that
      *                          (a subset of) the examined traffic is similar to the traffic that makes up
      *                          {@code cluster}, i.e., when the examined traffic is classified as pertaining to
      *                          {@code cluster}.
      */
      * @param detectionObservers Client code that wants to get notified whenever the {@link Layer3ClusterMatcher} detects that
      *                          (a subset of) the examined traffic is similar to the traffic that makes up
      *                          {@code cluster}, i.e., when the examined traffic is classified as pertaining to
      *                          {@code cluster}.
      */
-    public Layer3ClusterMatcher(List<List<PcapPacket>> cluster, String routerWanIp,
+    public Layer3ClusterMatcher(List<List<PcapPacket>> cluster, String routerWanIp, int inclusionTimeMillis,
+                                boolean isRangeBased, double eps,
                                 ClusterMatcherObserver... detectionObservers) {
                                 ClusterMatcherObserver... detectionObservers) {
-        super(cluster);
+        super(cluster, isRangeBased);
         Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
         for (ClusterMatcherObserver obs : detectionObservers) {
             addObserver(obs);
         Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
         for (ClusterMatcherObserver obs : detectionObservers) {
             addObserver(obs);
@@ -92,14 +80,19 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
          * on in favor of performance. However, it is only run once (at instantiation), so the overhead may be warranted
          * in order to ensure correctness, especially during the development/debugging phase.
          */
          * on in favor of performance. However, it is only run once (at instantiation), so the overhead may be warranted
          * in order to ensure correctness, especially during the development/debugging phase.
          */
-        if (mCluster.stream().
-                anyMatch(inner -> !Arrays.equals(mClusterMemberDirections, getPacketDirections(inner, null)))) {
-            throw new IllegalArgumentException(
-                    "cluster members must contain the same number of packets and exhibit the same packet direction " +
-                            "pattern"
-            );
+        if (!isRangeBased) {    // Only when it is not range-based
+            if (mCluster.stream().
+                    anyMatch(inner -> !Arrays.equals(mClusterMemberDirections, getPacketDirections(inner, null)))) {
+                throw new IllegalArgumentException(
+                        "cluster members must contain the same number of packets and exhibit the same packet direction " +
+                                "pattern"
+                );
+            }
         }
         }
+        mEps = eps;
         mRouterWanIp = routerWanIp;
         mRouterWanIp = routerWanIp;
+        mInclusionTimeMillis =
+                inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
     }
 
     @Override
     }
 
     @Override
@@ -116,7 +109,43 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
         return mCluster;
     }
 
         return mCluster;
     }
 
-    public void performDetection() {
+    public void performDetectionRangeBased() {
+        /*
+         * Let's start out simple by building a version that only works for signatures that do not span across multiple
+         * TCP conversations...
+         */
+        for (Conversation c : mTcpReassembler.getTcpConversations()) {
+            if (c.isTls() && c.getTlsApplicationDataPackets().isEmpty() || !c.isTls() && c.getPackets().isEmpty()) {
+                // Skip empty conversations.
+                continue;
+            }
+            List<PcapPacket> lowerBound = mCluster.get(0);
+            List<PcapPacket> upperBound = mCluster.get(1);
+            if (isTlsSequence(lowerBound) != c.isTls() || isTlsSequence(upperBound) != c.isTls()) {
+                // We consider it a mismatch if one is a TLS application data sequence and the other is not.
+                continue;
+            }
+            // Fetch set of packets to examine based on TLS or not.
+            List<PcapPacket> cPkts = c.isTls() ? c.getTlsApplicationDataPackets() : c.getPackets();
+            Optional<List<PcapPacket>> match;
+            while ((match = findSubsequenceInSequence(lowerBound, upperBound, cPkts, mClusterMemberDirections, null)).
+                    isPresent()) {
+                List<PcapPacket> matchSeq = match.get();
+                // Notify observers about the match.
+                // Max number of skipped packets in layer 3 is 0 (no skipped packets)
+                mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
+                /*
+                 * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
+                 * signature sequence.
+                 */
+                int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
+                // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
+                cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
+            }
+        }
+    }
+
+    public void performDetectionConservative() {
         /*
          * Let's start out simple by building a version that only works for signatures that do not span across multiple
          * TCP conversations...
         /*
          * Let's start out simple by building a version that only works for signatures that do not span across multiple
          * TCP conversations...
@@ -146,16 +175,18 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
                         isPresent()) {
                     List<PcapPacket> matchSeq = match.get();
                     // Notify observers about the match.
                         isPresent()) {
                     List<PcapPacket> matchSeq = match.get();
                     // Notify observers about the match.
+                    // Max number of skipped packets in layer 3 is 0 (no skipped packets)
                     mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
                     /*
                      * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
                      * signature sequence.
                      */
                     mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
                     /*
                      * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
                      * signature sequence.
                      */
-                    int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size()-1));
+                    int matchSeqEndIdx = cPkts.indexOf(matchSeq.get(matchSeq.size() - 1));
                     // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
                     cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
                 }
             }
                     // We restart the search for the signature sequence immediately after that index, so truncate cPkts.
                     cPkts = cPkts.stream().skip(matchSeqEndIdx + 1).collect(Collectors.toList());
                 }
             }
+
             /*
              * TODO:
              * if no item in cluster matches, also perform a distance-based matching to cover those cases where we did
             /*
              * TODO:
              * if no item in cluster matches, also perform a distance-based matching to cover those cases where we did
@@ -271,6 +302,102 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
         return Optional.empty();
     }
 
         return Optional.empty();
     }
 
+    /**
+     * Overloading the method {@code findSubsequenceInSequence} for range-based matching. Instead of a sequence,
+     * we have sequences of lower and upper bounds.
+     *
+     * @param lowerBound The lower bound of the sequence we search for.
+     * @param upperBound The upper bound of the sequence we search for.
+     * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
+     *                              {@code subsequenceDirections[i]} is the direction of the packet returned by
+     *                              {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
+     *                              internally compute the packet directions.
+     * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
+     *                           {@code sequenceDirections[i]} is the direction of the packet returned by
+     *                           {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
+     *                           compute the packet directions.
+     *
+     * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
+     *         {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
+     */
+    private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> lowerBound,
+                                                                 List<PcapPacket> upperBound,
+                                                                 List<PcapPacket> sequence,
+                                                                 Conversation.Direction[] subsequenceDirections,
+                                                                 Conversation.Direction[] sequenceDirections) {
+        // Just do the checks for either lower or upper bound!
+        // TODO: For now we use just the lower bound
+        if (sequence.size() < lowerBound.size()) {
+            // If subsequence is longer, it cannot be contained in sequence.
+            return Optional.empty();
+        }
+        if (isTlsSequence(lowerBound) != isTlsSequence(sequence)) {
+            // We consider it a mismatch if one is a TLS application data sequence and the other is not.
+            return Optional.empty();
+        }
+        // If packet directions have not been precomputed by calling code, we need to construct them.
+        if (subsequenceDirections == null) {
+            subsequenceDirections = getPacketDirections(lowerBound, mRouterWanIp);
+        }
+        if (sequenceDirections == null) {
+            sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
+        }
+        int subseqIdx = 0;
+        int seqIdx = 0;
+        while (seqIdx < sequence.size()) {
+            PcapPacket lowBndPkt = lowerBound.get(subseqIdx);
+            PcapPacket upBndPkt = upperBound.get(subseqIdx);
+            PcapPacket seqPkt = sequence.get(seqIdx);
+            // We only have a match if packet lengths and directions match.
+            // The packet lengths have to be in the range of [lowerBound - eps, upperBound+eps]
+            // We initialize the lower and upper bounds first
+            int epsLowerBound = lowBndPkt.length();
+            int epsUpperBound = upBndPkt.length();
+            // Do strict matching if the lower and upper bounds are the same length
+            // Do range matching with eps otherwise
+            if (epsLowerBound != epsUpperBound) {
+                // TODO: Maybe we could do better here for the double to integer conversion?
+                epsLowerBound = epsLowerBound - (int) mEps;
+                epsUpperBound = epsUpperBound + (int) mEps;
+            }
+            if (epsLowerBound <= seqPkt.getOriginalLength() &&
+                    seqPkt.getOriginalLength() <= epsUpperBound &&
+                    subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
+                // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
+                subseqIdx++;
+                seqIdx++;
+                if (subseqIdx == lowerBound.size()) {
+                    // We managed to match the entire subsequence in sequence.
+                    // Return the sublist of sequence that matches subsequence.
+                    /*
+                     * TODO:
+                     * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
+                     * for live traces!
+                     */
+                    return Optional.of(sequence.subList(seqIdx - lowerBound.size(), seqIdx));
+                }
+            } else {
+                // Mismatch.
+                if (subseqIdx > 0) {
+                    /*
+                     * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
+                     * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
+                     * leave seqIdx untouched.
+                     */
+                    subseqIdx = 0;
+                } else {
+                    /*
+                     * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
+                     * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
+                     * sequence.
+                     */
+                    seqIdx++;
+                }
+            }
+        }
+        return Optional.empty();
+    }
+
     /**
      * Given a cluster, produces a pruned version of that cluster. In the pruned version, there are no duplicate cluster
      * members. Two cluster members are considered identical if their packets lengths and packet directions are
     /**
      * Given a cluster, produces a pruned version of that cluster. In the pruned version, there are no duplicate cluster
      * members. Two cluster members are considered identical if their packets lengths and packet directions are