Trying to be more strict by applying the signature duration constraint at packet...
[pingpong.git] / Code / Projects / PacketLevelSignatureExtractor / src / main / java / edu / uci / iotproject / detection / layer2 / Layer2ClusterMatcher.java
index 5021c31..a3f4d0e 100644 (file)
@@ -1,5 +1,6 @@
 package edu.uci.iotproject.detection.layer2;
 
+import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassembler;
 import edu.uci.iotproject.trafficreassembly.layer2.Layer2Flow;
 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassemblerObserver;
@@ -27,16 +28,30 @@ public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Laye
      * of {@link #mCluster} and has so far matched {@code j} packets of that particular sequence.
      */
     private final Map<Layer2Flow, Layer2SequenceMatcher[][]> mPerFlowSeqMatchers = new HashMap<>();
+    private final Map<Layer2Flow, Layer2RangeMatcher[]> mPerFlowRangeMatcher = new HashMap<>();
 
     private final Function<Layer2Flow, Boolean> mFlowFilter;
 
+    /**
+     * Specifying range-based instead of conservative exact matching.
+     */
+    private final boolean mRangeBased;
+
+    /**
+     * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
+     */
+    private final double mEps;
+
+    private int mInclusionTimeMillis;
+
     /**
      * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
      * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
      */
-    public Layer2ClusterMatcher(List<List<PcapPacket>> cluster) {
+    public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, int inclusionTimeMillis,
+                                boolean isRangeBased, double eps) {
         // Consider all flows if no flow filter specified.
-        this(cluster, flow -> true);
+        this(cluster, flow -> true, inclusionTimeMillis, isRangeBased, eps);
     }
 
     /**
@@ -48,14 +63,30 @@ public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Laye
      *                   namely when the {@link Layer2FlowReassembler} notifies the {@link Layer2ClusterMatcher} about
      *                   the new flow. This functionality may for example come in handy when one only wants to search
      *                   for matches in the subset of flows that involves a specific (range of) MAC(s).
+     * @param inclusionTimeMillis Packet inclusion time limit for matching.
+     * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
+     * @param eps The epsilon value used in the DBSCAN algorithm.
      */
-    public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, Function<Layer2Flow, Boolean> flowFilter) {
-        super(cluster);
+    public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, Function<Layer2Flow, Boolean> flowFilter,
+                                int inclusionTimeMillis, boolean isRangeBased, double eps) {
+        super(cluster, isRangeBased);
         mFlowFilter = flowFilter;
+        mRangeBased = isRangeBased;
+        mEps = eps;
+        mInclusionTimeMillis =
+                inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
     }
 
     @Override
     public void onNewPacket(Layer2Flow flow, PcapPacket newPacket) {
+        if (mRangeBased) {
+            rangeBasedMatching(flow, newPacket);
+        } else {
+            conservativeMatching(flow, newPacket);
+        }
+    }
+
+    private void conservativeMatching(Layer2Flow flow, PcapPacket newPacket) {
         if (mPerFlowSeqMatchers.get(flow) == null) {
             // If this is the first time we encounter this flow, we need to set up sequence matchers for it.
             // All sequences of the cluster have the same length, so we only need to compute the length of the nested
@@ -65,7 +96,7 @@ public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Laye
             Layer2SequenceMatcher[][] matchers = new Layer2SequenceMatcher[mCluster.size()][mCluster.get(0).size()];
             // Prepare a "state 0" sequence matcher for each sequence variation in the cluster.
             for (int i = 0; i < matchers.length; i++) {
-                matchers[i][0] = new Layer2SequenceMatcher(mCluster.get(i));
+                matchers[i][0] = new Layer2SequenceMatcher(mCluster.get(i), mInclusionTimeMillis);
             }
             // Associate the new sequence matcher table with the new flow
             mPerFlowSeqMatchers.put(flow, matchers);
@@ -105,13 +136,64 @@ public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Laye
                     // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
                     // from state zero completed its matching or if it replaced a different one in state 1 or not.
                     if (sm.getMatchedPacketsCount() == 1) {
-                        matchers[i][j] = new Layer2SequenceMatcher(sm.getTargetSequence());
+                        matchers[i][j] = new Layer2SequenceMatcher(sm.getTargetSequence(), mInclusionTimeMillis);
                     }
                 }
             }
         }
     }
 
+    private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
+        // TODO: For range-based matching, we only care about matching a range; therefore it is a matcher array.
+        if (mPerFlowRangeMatcher.get(flow) == null) {
+            // If this is the first time we encounter this flow, we need to set up a sequence matcher.
+            // All sequences of the cluster have the same length, so we only need to compute the length of the
+            // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
+            // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
+            // around), so the length of the array is simply the sequence length.
+            Layer2RangeMatcher[] matcher = new Layer2RangeMatcher[mCluster.get(0).size()];
+            // Prepare a "state 0" sequence matcher.
+            matcher[0] = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1), mInclusionTimeMillis, mEps);
+            // Associate the new sequence matcher table with the new flow.
+            mPerFlowRangeMatcher.put(flow, matcher);
+        }
+        // Fetch table that contains sequence matchers for this flow.
+        Layer2RangeMatcher[] matcher = mPerFlowRangeMatcher.get(flow);
+        // Present packet to the sequence matcher.
+        for (int j = matcher.length - 1; j >= 0; j--) {
+            Layer2RangeMatcher sm = matcher[j];
+            if (sm == null) {
+                // There is currently no sequence matcher that has managed to match j packets.
+                continue;
+            }
+            boolean matched = sm.matchPacket(newPacket);
+            if (matched) {
+                if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
+                    // Sequence matcher has a match. Report it to observers.
+                    mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
+                    // Remove the now terminated sequence matcher.
+                    matcher[j] = null;
+                } else {
+                    // Sequence matcher advanced one step, so move it to its corresponding new position iff the
+                    // packet that advanced it has a later timestamp than that of the last matched packet of the
+                    // sequence matcher at the new index, if any. In most traces, a small amount of the packets
+                    // appear out of order (with regards to their timestamp), which is why this check is required.
+                    // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
+                    // order here.
+                    if (matcher[j+1] == null ||
+                            newPacket.getTimestamp().isAfter(matcher[j+1].getLastPacket().getTimestamp())) {
+                        matcher[j+1] = sm;
+                    }
+                }
+                // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
+                // from state zero completed its matching or if it replaced a different one in state 1 or not.
+                if (sm.getMatchedPacketsCount() == 1) {
+                    matcher[j] = new Layer2RangeMatcher(sm.getTargetLowerBound(), sm.getTargetUpperBound(),
+                            mInclusionTimeMillis, mEps);
+                }
+            }
+        }
+    }
 
     @Override
     protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {