Implementing relaxed matching for layer 2 and layer 3.
[pingpong.git] / Code / Projects / PacketLevelSignatureExtractor / src / main / java / edu / uci / iotproject / detection / layer3 / Layer3ClusterMatcher.java
index b070bd24812112e861a069b6fe09cf4d2f00111b..d9a51fe9ebc9c324698267cc1fc1daab8dc3a94c 100644 (file)
@@ -1,5 +1,6 @@
 package edu.uci.iotproject.detection.layer3;
 
+import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
 import edu.uci.iotproject.detection.AbstractClusterMatcher;
 import edu.uci.iotproject.detection.ClusterMatcherObserver;
 import edu.uci.iotproject.trafficreassembly.layer3.Conversation;
@@ -24,33 +25,6 @@ import static edu.uci.iotproject.util.PcapPacketUtils.*;
  */
 public class Layer3ClusterMatcher extends AbstractClusterMatcher implements PacketListener {
 
-    // Test client
-    public static void main(String[] args) throws PcapNativeException, NotOpenException {
-
-//        String path = "/scratch/July-2018"; // Rahmadi
-//        String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
-//        final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
-//        final String signatureFile = path + "/2018-07/dlink/offSignature1.sig";
-//
-//        List<List<PcapPacket>> signature = PrintUtils.deserializeClustersFromFile(signatureFile);
-//        Layer3ClusterMatcher clusterMatcher = new Layer3ClusterMatcher(signature, null,
-//                (sig, match) -> System.out.println(
-//                        String.format("[ !!! SIGNATURE DETECTED AT %s !!! ]",
-//                                match.get(0).getTimestamp().atZone(ZoneId.of("America/Los_Angeles")))
-//                )
-//        );
-//
-//        PcapHandle handle;
-//        try {
-//            handle = Pcaps.openOffline(inputPcapFile, PcapHandle.TimestampPrecision.NANO);
-//        } catch (PcapNativeException pne) {
-//            handle = Pcaps.openOffline(inputPcapFile);
-//        }
-//        PcapHandleReader reader = new PcapHandleReader(handle, p -> true, clusterMatcher);
-//        reader.readFromHandle();
-//        clusterMatcher.performDetection();
-    }
-
     /**
      * The ordered directions of packets in the sequences that make up {@link #mCluster}.
      */
@@ -59,7 +33,7 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
     /**
      * For reassembling the observed traffic into TCP connections.
      */
-    private final TcpReassembler mTcpReassembler = new TcpReassembler();
+    private final TcpReassembler mTcpReassembler;
 
     /**
      * IP of the router's WAN port (if analyzed traffic is captured at the ISP's point of view).
@@ -71,19 +45,32 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
      */
     private final double mEps;
 
+    /**
+     * The packet inclusion time for signature.
+     */
+    private int mInclusionTimeMillis;
+
+    /**
+     * Relaxed matching
+     */
+    private int mDelta;
+    private Set<Integer> mPacketSet;
+
     /**
      * Create a {@link Layer3ClusterMatcher}.
      * @param cluster The cluster that traffic is matched against.
      * @param routerWanIp The router's WAN IP if examining traffic captured at the ISP's point of view (used for
      *                    determining the direction of packets).
-     * @param eps The epsilon value used in the DBSCAN algorithm.
+     * @param inclusionTimeMillis The packet inclusion time for signature.
      * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
+     * @param eps The epsilon value used in the DBSCAN algorithm.
      * @param detectionObservers Client code that wants to get notified whenever the {@link Layer3ClusterMatcher} detects that
      *                          (a subset of) the examined traffic is similar to the traffic that makes up
      *                          {@code cluster}, i.e., when the examined traffic is classified as pertaining to
      *                          {@code cluster}.
      */
-    public Layer3ClusterMatcher(List<List<PcapPacket>> cluster, String routerWanIp, boolean isRangeBased, double eps,
+    public Layer3ClusterMatcher(List<List<PcapPacket>> cluster, String routerWanIp, int inclusionTimeMillis,
+                                boolean isRangeBased, double eps, int delta, Set<Integer> packetSet,
                                 ClusterMatcherObserver... detectionObservers) {
         super(cluster, isRangeBased);
         Objects.requireNonNull(detectionObservers, "detectionObservers cannot be null");
@@ -110,6 +97,11 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
         }
         mEps = eps;
         mRouterWanIp = routerWanIp;
+                               mTcpReassembler = new TcpReassembler(mRouterWanIp);
+        mInclusionTimeMillis =
+                inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
+        mDelta = delta;
+        mPacketSet = packetSet;
     }
 
     @Override
@@ -149,6 +141,7 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
                     isPresent()) {
                 List<PcapPacket> matchSeq = match.get();
                 // Notify observers about the match.
+                // Max number of skipped packets in layer 3 is 0 (no skipped packets)
                 mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
                 /*
                  * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
@@ -161,6 +154,7 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
         }
     }
 
+    // TODO: Relaxed matching with delta is only applied to conservative matching for now
     public void performDetectionConservative() {
         /*
          * Let's start out simple by building a version that only works for signatures that do not span across multiple
@@ -187,10 +181,11 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
                  * won't have to be recomputed internally in each call to findSubsequenceInSequence().
                  */
                 Optional<List<PcapPacket>> match;
-                while ((match = findSubsequenceInSequence(signatureSequence, cPkts, mClusterMemberDirections, null)).
+                while ((match = findSubsequenceInSequence(signatureSequence, cPkts, mClusterMemberDirections, null, mDelta, mPacketSet)).
                         isPresent()) {
                     List<PcapPacket> matchSeq = match.get();
                     // Notify observers about the match.
+                    // Max number of skipped packets in layer 3 is 0 (no skipped packets)
                     mObservers.forEach(o -> o.onMatch(Layer3ClusterMatcher.this, matchSeq));
                     /*
                      * Get the index in cPkts of the last packet in the sequence of packets that matches the searched
@@ -317,6 +312,95 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
         return Optional.empty();
     }
 
+    /**
+     * Overload the same method with relaxed matching.
+     *
+     * @param subsequence The sequence to search for.
+     * @param sequence The sequence to search.
+     * @param subsequenceDirections The directions of packets in {@code subsequence} such that for all {@code i},
+     *                              {@code subsequenceDirections[i]} is the direction of the packet returned by
+     *                              {@code subsequence.get(i)}. May be set to {@code null}, in which this call will
+     *                              internally compute the packet directions.
+     * @param sequenceDirections The directions of packets in {@code sequence} such that for all {@code i},
+     *                           {@code sequenceDirections[i]} is the direction of the packet returned by
+     *                           {@code sequence.get(i)}. May be set to {@code null}, in which this call will internally
+     *                           compute the packet directions.
+     * @param delta The delta for relaxed matching
+     * @param packetSet The set of unique packet lengths, whose matching is to be relaxed
+     *
+     * @return An {@link Optional} containing the part of {@code sequence} that matches {@code subsequence}, or an empty
+     *         {@link Optional} if no part of {@code sequence} matches {@code subsequence}.
+     */
+    private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
+                                                                 List<PcapPacket> sequence,
+                                                                 Conversation.Direction[] subsequenceDirections,
+                                                                 Conversation.Direction[] sequenceDirections,
+                                                                 int delta,
+                                                                 Set<Integer> packetSet) {
+        if (sequence.size() < subsequence.size()) {
+            // If subsequence is longer, it cannot be contained in sequence.
+            return Optional.empty();
+        }
+        if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
+            // We consider it a mismatch if one is a TLS application data sequence and the other is not.
+            return Optional.empty();
+        }
+        // If packet directions have not been precomputed by calling code, we need to construct them.
+        if (subsequenceDirections == null) {
+            subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
+        }
+        if (sequenceDirections == null) {
+            sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
+        }
+        int subseqIdx = 0;
+        int seqIdx = 0;
+        while (seqIdx < sequence.size()) {
+            PcapPacket subseqPkt = subsequence.get(subseqIdx);
+            PcapPacket seqPkt = sequence.get(seqIdx);
+            // We only have a match if packet lengths and directions match.
+            // Do relaxed matching here if applicable
+            if ((delta > 0 && packetSet.contains(subseqPkt.getOriginalLength()) &&
+                    subseqPkt.getOriginalLength() - delta <= seqPkt.getOriginalLength() &&
+                    seqPkt.getOriginalLength() <= subseqPkt.getOriginalLength() + delta &&
+                    subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) ||
+                    // Or just exact matching
+                    (subseqPkt.getOriginalLength() == seqPkt.getOriginalLength() &&
+                     subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx])) {
+                // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
+                subseqIdx++;
+                seqIdx++;
+                if (subseqIdx == subsequence.size()) {
+                    // We managed to match the entire subsequence in sequence.
+                    // Return the sublist of sequence that matches subsequence.
+                    /*
+                     * TODO:
+                     * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
+                     * for live traces!
+                     */
+                    return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
+                }
+            } else {
+                // Mismatch.
+                if (subseqIdx > 0) {
+                    /*
+                     * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
+                     * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
+                     * leave seqIdx untouched.
+                     */
+                    subseqIdx = 0;
+                } else {
+                    /*
+                     * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
+                     * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
+                     * sequence.
+                     */
+                    seqIdx++;
+                }
+            }
+        }
+        return Optional.empty();
+    }
+
     /**
      * Overloading the method {@code findSubsequenceInSequence} for range-based matching. Instead of a sequence,
      * we have sequences of lower and upper bounds.
@@ -365,9 +449,16 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
             PcapPacket seqPkt = sequence.get(seqIdx);
             // We only have a match if packet lengths and directions match.
             // The packet lengths have to be in the range of [lowerBound - eps, upperBound+eps]
-            // TODO: Maybe we could do better here for the double to integer conversion?
-            int epsLowerBound = lowBndPkt.length() - (int) mEps;
-            int epsUpperBound = upBndPkt.length() + (int) mEps;
+            // We initialize the lower and upper bounds first
+            int epsLowerBound = lowBndPkt.length();
+            int epsUpperBound = upBndPkt.length();
+            // Do strict matching if the lower and upper bounds are the same length
+            // Do range matching with eps otherwise
+            if (epsLowerBound != epsUpperBound) {
+                // TODO: Maybe we could do better here for the double to integer conversion?
+                epsLowerBound = epsLowerBound - (int) mEps;
+                epsUpperBound = epsUpperBound + (int) mEps;
+            }
             if (epsLowerBound <= seqPkt.getOriginalLength() &&
                     seqPkt.getOriginalLength() <= epsUpperBound &&
                     subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
@@ -406,6 +497,138 @@ public class Layer3ClusterMatcher extends AbstractClusterMatcher implements Pack
         return Optional.empty();
     }
 
+    // TODO: EXPERIMENT WITH ONLY PACKET DIRECTION AND TIMING
+//    private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> subsequence,
+//                                                                 List<PcapPacket> sequence,
+//                                                                 Conversation.Direction[] subsequenceDirections,
+//                                                                 Conversation.Direction[] sequenceDirections) {
+//        if (sequence.size() < subsequence.size()) {
+//            // If subsequence is longer, it cannot be contained in sequence.
+//            return Optional.empty();
+//        }
+//        if (isTlsSequence(subsequence) != isTlsSequence(sequence)) {
+//            // We consider it a mismatch if one is a TLS application data sequence and the other is not.
+//            return Optional.empty();
+//        }
+//        // If packet directions have not been precomputed by calling code, we need to construct them.
+//        if (subsequenceDirections == null) {
+//            subsequenceDirections = getPacketDirections(subsequence, mRouterWanIp);
+//        }
+//        if (sequenceDirections == null) {
+//            sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
+//        }
+//        int subseqIdx = 0;
+//        int seqIdx = 0;
+//        while (subseqIdx < subsequence.size() && seqIdx < sequence.size()) {
+//            // We only have a match if packet lengths and directions match.
+//            if (subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
+//                // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
+//                subseqIdx++;
+//                seqIdx++;
+//                if (subseqIdx == subsequence.size()) {
+//                    // We managed to match the entire subsequence in sequence.
+//                    // Return the sublist of sequence that matches subsequence.
+//                    /*
+//                     * TODO:
+//                     * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
+//                     * for live traces!
+//                     */
+//                    // TODO: ALSO CHECK TIMING CONSTRAINT
+//                    PcapPacket firstPacket = sequence.get(seqIdx - subsequence.size());
+//                    PcapPacket lastPacket = sequence.get(seqIdx-1);
+//                    if (!lastPacket.getTimestamp().isAfter(firstPacket.getTimestamp().plusMillis(mInclusionTimeMillis))) {
+//                        return Optional.of(sequence.subList(seqIdx - subsequence.size(), seqIdx));
+//                    }
+//                }
+//            } else {
+//                // Mismatch.
+//                if (subseqIdx > 0) {
+//                    /*
+//                     * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
+//                     * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
+//                     * leave seqIdx untouched.
+//                     */
+//                    subseqIdx = 0;
+//                } else {
+//                    /*
+//                     * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
+//                     * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
+//                     * sequence.
+//                     */
+//                    seqIdx++;
+//                }
+//            }
+//        }
+//        return Optional.empty();
+//    }
+//
+//    private Optional<List<PcapPacket>> findSubsequenceInSequence(List<PcapPacket> lowerBound,
+//                                                                 List<PcapPacket> upperBound,
+//                                                                 List<PcapPacket> sequence,
+//                                                                 Conversation.Direction[] subsequenceDirections,
+//                                                                 Conversation.Direction[] sequenceDirections) {
+//        // Just do the checks for either lower or upper bound!
+//        // TODO: For now we use just the lower bound
+//        if (sequence.size() < lowerBound.size()) {
+//            // If subsequence is longer, it cannot be contained in sequence.
+//            return Optional.empty();
+//        }
+//        if (isTlsSequence(lowerBound) != isTlsSequence(sequence)) {
+//            // We consider it a mismatch if one is a TLS application data sequence and the other is not.
+//            return Optional.empty();
+//        }
+//        // If packet directions have not been precomputed by calling code, we need to construct them.
+//        if (subsequenceDirections == null) {
+//            subsequenceDirections = getPacketDirections(lowerBound, mRouterWanIp);
+//        }
+//        if (sequenceDirections == null) {
+//            sequenceDirections = getPacketDirections(sequence, mRouterWanIp);
+//        }
+//        int subseqIdx = 0;
+//        int seqIdx = 0;
+//        while (subseqIdx < lowerBound.size() && seqIdx < sequence.size()) {
+//            // TODO: ONLY MATCH PACKET DIRECTIONS
+//            if (subsequenceDirections[subseqIdx] == sequenceDirections[seqIdx]) {
+//                // A match; advance both indices to consider next packet in subsequence vs. next packet in sequence.
+//                subseqIdx++;
+//                seqIdx++;
+//                if (subseqIdx == lowerBound.size()) {
+//                    // We managed to match the entire subsequence in sequence.
+//                    // Return the sublist of sequence that matches subsequence.
+//                    /*
+//                     * TODO:
+//                     * ASSUMES THE BACKING LIST (i.e., 'sequence') IS _NOT_ STRUCTURALLY MODIFIED, hence may not work
+//                     * for live traces!
+//                     */
+//                    // TODO: ALSO CHECK TIMING CONSTRAINT
+//                    PcapPacket firstPacket = sequence.get(seqIdx - lowerBound.size());
+//                    PcapPacket lastPacket = sequence.get(seqIdx);
+//                    if (!lastPacket.getTimestamp().isAfter(firstPacket.getTimestamp().plusMillis(mInclusionTimeMillis))) {
+//                        return Optional.of(sequence.subList(seqIdx - lowerBound.size(), seqIdx));
+//                    }
+//                }
+//            } else {
+//                // Mismatch.
+//                if (subseqIdx > 0) {
+//                    /*
+//                     * If we managed to match parts of subsequence, we restart the search for subsequence in sequence at
+//                     * the index of sequence where the current mismatch occurred. I.e., we must reset subseqIdx, but
+//                     * leave seqIdx untouched.
+//                     */
+//                    subseqIdx = 0;
+//                } else {
+//                    /*
+//                     * First packet of subsequence didn't match packet at seqIdx of sequence, so we move forward in
+//                     * sequence, i.e., we continue the search for subsequence in sequence starting at index seqIdx+1 of
+//                     * sequence.
+//                     */
+//                    seqIdx++;
+//                }
+//            }
+//        }
+//        return Optional.empty();
+//    }
+
     /**
      * Given a cluster, produces a pruned version of that cluster. In the pruned version, there are no duplicate cluster
      * members. Two cluster members are considered identical if their packets lengths and packet directions are