Code/Projects/PacketLevelSignatureExtractor/src/main/java/edu/uci/iotproject/detection/layer2/Layer2ClusterMatcher.java

   1 package edu.uci.iotproject.detection.layer2;
   2
   3 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
   4 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassembler;
   5 import edu.uci.iotproject.trafficreassembly.layer2.Layer2Flow;
   6 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassemblerObserver;
   7 import edu.uci.iotproject.detection.AbstractClusterMatcher;
   8 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowObserver;
   9 import org.pcap4j.core.*;
  10
  11 import java.util.ArrayList;
  12 import java.util.HashMap;
  13 import java.util.List;
  14 import java.util.Map;
  15 import java.util.concurrent.CopyOnWriteArrayList;
  16 import java.util.function.Function;
  17
  18 /**
  19  * Attempts to detect members of a cluster (packet sequence mutations) in layer 2 flows.
  20  *
  21  * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
  22  * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
  23  */
  24 public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Layer2FlowReassemblerObserver, Layer2FlowObserver {
  25
  26     /**
  27      * Maps from a flow to a table of {@link Layer2SequenceMatcher}s for that particular flow. The table {@code t} is
  28      * structured such that {@code t[i][j]} is a {@link Layer2SequenceMatcher} that attempts to match member {@code i}
  29      * of {@link #mCluster} and has so far matched {@code j} packets of that particular sequence.
  30      */
  31     private final Map<Layer2Flow, Layer2SequenceMatcher[][]> mPerFlowSeqMatchers = new HashMap<>();
  32 //    private final Map<Layer2Flow, Layer2RangeMatcher[]> mPerFlowRangeMatcher = new HashMap<>();
  33     private final Map<Layer2Flow, List<Layer2RangeMatcher>> mPerFlowRangeMatcher = new HashMap<>();
  34
  35     private final Function<Layer2Flow, Boolean> mFlowFilter;
  36
  37     /**
  38      * Specifying range-based instead of conservative exact matching.
  39      */
  40     private final boolean mRangeBased;
  41
  42     /**
  43      * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
  44      */
  45     private final double mEps;
  46
  47     private int mInclusionTimeMillis;
  48
  49     /**
  50      * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
  51      * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
  52      */
  53     public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, int inclusionTimeMillis,
  54                                 boolean isRangeBased, double eps) {
  55         // Consider all flows if no flow filter specified.
  56         this(cluster, flow -> true, inclusionTimeMillis, isRangeBased, eps);
  57     }
  58
  59     /**
  60      * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
  61      * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
  62      * @param flowFilter A filter that defines what {@link Layer2Flow}s the new {@link Layer2ClusterMatcher} should
  63      *                   search for {@code cluster}'s members in. If {@code flowFilter} returns {@code true}, the flow
  64      *                   will be included (searched). Note that {@code flowFilter} is only queried once for each flow,
  65      *                   namely when the {@link Layer2FlowReassembler} notifies the {@link Layer2ClusterMatcher} about
  66      *                   the new flow. This functionality may for example come in handy when one only wants to search
  67      *                   for matches in the subset of flows that involves a specific (range of) MAC(s).
  68      * @param inclusionTimeMillis Packet inclusion time limit for matching.
  69      * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
  70      * @param eps The epsilon value used in the DBSCAN algorithm.
  71      */
  72     public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, Function<Layer2Flow, Boolean> flowFilter,
  73                                 int inclusionTimeMillis, boolean isRangeBased, double eps) {
  74         super(cluster, isRangeBased);
  75         mFlowFilter = flowFilter;
  76         mRangeBased = isRangeBased;
  77         mEps = eps;
  78         mInclusionTimeMillis =
  79                 inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
  80     }
  81
  82     @Override
  83     public void onNewPacket(Layer2Flow flow, PcapPacket newPacket) {
  84         if (mRangeBased) {
  85             rangeBasedMatching(flow, newPacket);
  86         } else {
  87             conservativeMatching(flow, newPacket);
  88         }
  89     }
  90
  91     private void conservativeMatching(Layer2Flow flow, PcapPacket newPacket) {
  92         if (mPerFlowSeqMatchers.get(flow) == null) {
  93             // If this is the first time we encounter this flow, we need to set up sequence matchers for it.
  94             // All sequences of the cluster have the same length, so we only need to compute the length of the nested
  95             // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
  96             // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
  97             // around), so the length of the inner array is simply the sequence length.
  98             Layer2SequenceMatcher[][] matchers = new Layer2SequenceMatcher[mCluster.size()][mCluster.get(0).size()];
  99             // Prepare a "state 0" sequence matcher for each sequence variation in the cluster.
 100             for (int i = 0; i < matchers.length; i++) {
 101                 matchers[i][0] = new Layer2SequenceMatcher(mCluster.get(i), mInclusionTimeMillis);
 102             }
 103             // Associate the new sequence matcher table with the new flow
 104             mPerFlowSeqMatchers.put(flow, matchers);
 105         }
 106         // Fetch table that contains sequence matchers for this flow.
 107         Layer2SequenceMatcher[][] matchers = mPerFlowSeqMatchers.get(flow);
 108         // Present the packet to all sequence matchers.
 109         for (int i = 0; i < matchers.length; i++) {
 110             // Present packet to the sequence matchers that has advanced the most first. This is to prevent discarding
 111             // the sequence matchers that have advanced the most in the special case where the searched sequence
 112             // contains two packets of the same length going in the same direction.
 113             for (int j = matchers[i].length - 1; j >= 0 ; j--) {
 114                 Layer2SequenceMatcher sm = matchers[i][j];
 115                 if (sm == null) {
 116                     // There is currently no sequence matcher that has managed to match j packets.
 117                     continue;
 118                 }
 119                 boolean matched = sm.matchPacket(newPacket);
 120                 if (matched) {
 121                     if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
 122                         // Sequence matcher has a match. Report it to observers.
 123                         mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
 124                         // Remove the now terminated sequence matcher.
 125                         matchers[i][j] = null;
 126                     } else {
 127                         // Sequence matcher advanced one step, so move it to its corresponding new position iff the
 128                         // packet that advanced it has a later timestamp than that of the last matched packet of the
 129                         // sequence matcher at the new index, if any. In most traces, a small amount of the packets
 130                         // appear out of order (with regards to their timestamp), which is why this check is required.
 131                         // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
 132                         // order here.
 133                         if (matchers[i][j+1] == null ||
 134                                 newPacket.getTimestamp().isAfter(matchers[i][j+1].getLastPacket().getTimestamp())) {
 135                             matchers[i][j+1] = sm;
 136                         }
 137                     }
 138                     // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
 139                     // from state zero completed its matching or if it replaced a different one in state 1 or not.
 140                     if (sm.getMatchedPacketsCount() == 1) {
 141                         matchers[i][j] = new Layer2SequenceMatcher(sm.getTargetSequence(), mInclusionTimeMillis);
 142                     }
 143                 }
 144             }
 145         }
 146     }
 147
 148     private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
 149         // TODO: For range-based matching, we need to create a new matcher every time we see the first element of
 150         //  the sequence (between lower and upper bounds).
 151         if (mPerFlowRangeMatcher.get(flow) == null) {
 152             // If this is the first time we encounter this flow, we need to set up a list of sequence matchers.
 153             List<Layer2RangeMatcher> listMatchers = new ArrayList<>();
 154             // Prepare a "state 0" sequence matcher.
 155             Layer2RangeMatcher matcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
 156                     mInclusionTimeMillis, mEps);
 157             listMatchers.add(matcher);
 158             // Associate the new sequence matcher table with the new flow.
 159             mPerFlowRangeMatcher.put(flow, listMatchers);
 160         }
 161         // Fetch table that contains sequence matchers for this flow.
 162         List<Layer2RangeMatcher> listMatchers = mPerFlowRangeMatcher.get(flow);
 163         // Add a new matcher if all matchers have already advanced to the next stage.
 164         // We always need a new matcher to match from NO packets.
 165         boolean addOneArray = true;
 166         for(Layer2RangeMatcher matcher : listMatchers) {
 167             if (matcher.getMatchedPacketsCount() == 0) {
 168                 addOneArray = false;
 169             }
 170         }
 171         // Add the new matcher into the list
 172         if (addOneArray) {
 173             Layer2RangeMatcher newMatcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
 174                     mInclusionTimeMillis, mEps);
 175             listMatchers.add(newMatcher);
 176         }
 177         // Present packet to the sequence matchers.
 178         // Make a shallow copy of the list so that we can clean up the actual list when a matcher is terminated
 179         List<Layer2RangeMatcher> listMatchersCopy = new ArrayList<>(listMatchers);
 180         for(Layer2RangeMatcher matcher : listMatchersCopy) {
 181             Layer2RangeMatcher sm = matcher;
 182             // Check if no packets are matched yet or if there are matched packets, the next packet to be matched
 183             // has to be later than the last matched packet.
 184             // In most traces, a small amount of the packets appear out of order (with regards to their timestamp),
 185             // which is why this check is required.
 186             // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
 187             // order here.
 188             if (sm.getMatchedPacketsCount() == 0 ||
 189                     newPacket.getTimestamp().isAfter(sm.getLastPacket().getTimestamp())) {
 190                 boolean matched = sm.matchPacket(newPacket);
 191                 if (matched) {
 192                     if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
 193                         // Sequence matcher has a match. Report it to observers.
 194                         mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
 195                         // Terminate sequence matcher since matching is complete.
 196                         listMatchers.remove(matcher);
 197                     }
 198                 }
 199             }
 200         }
 201     }
 202
 203 //    private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
 204 //        // TODO: For range-based matching, we only care about matching a range; therefore it is a matcher array.
 205 //        if (mPerFlowRangeMatcher.get(flow) == null) {
 206 //            // If this is the first time we encounter this flow, we need to set up a sequence matcher.
 207 //            // All sequences of the cluster have the same length, so we only need to compute the length of the
 208 //            // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
 209 //            // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
 210 //            // around), so the length of the array is simply the sequence length.
 211 //            Layer2RangeMatcher[] matcher = new Layer2RangeMatcher[mCluster.get(0).size()];
 212 //            // Prepare a "state 0" sequence matcher.
 213 //            matcher[0] = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1), mInclusionTimeMillis, mEps);
 214 //            // Associate the new sequence matcher table with the new flow.
 215 //            mPerFlowRangeMatcher.put(flow, matcher);
 216 //        }
 217 //        // Fetch table that contains sequence matchers for this flow.
 218 //        Layer2RangeMatcher[] matcher = mPerFlowRangeMatcher.get(flow);
 219 //        // Present packet to the sequence matcher.
 220 //        for (int j = matcher.length - 1; j >= 0; j--) {
 221 //            Layer2RangeMatcher sm = matcher[j];
 222 //            if (sm == null) {
 223 //                // There is currently no sequence matcher that has managed to match j packets.
 224 //                continue;
 225 //            }
 226 //            boolean matched = sm.matchPacket(newPacket);
 227 //
 228 //            // TODO: DEBUGGING
 229 //            long timeStamp = newPacket.getTimestamp().getEpochSecond();
 230 //            if (339 == newPacket.length() && timeStamp == 1542297773) {
 231 //                System.out.println("Timestamp of length 339: " + newPacket.getTimestamp().getEpochSecond());
 232 //                int length = matcher.length;
 233 //            }
 234 //            if (329 == newPacket.length() && timeStamp == 1542297773) {
 235 //                System.out.println("Timestamp of length 329: " + newPacket.getTimestamp().getEpochSecond());
 236 //            }
 237 //            if (364 <= newPacket.length() && newPacket.length() <= 365 && timeStamp == 1542297773) {
 238 //                System.out.println("Timestamp of length 364-365: " + newPacket.getTimestamp().getEpochSecond());
 239 //            }
 240 //            if (1061 <= newPacket.length() && newPacket.length() <= 1070 && timeStamp == 1542297773) {
 241 //                System.out.println("Timestamp of length 1061-1070: " + newPacket.getTimestamp().getEpochSecond());
 242 //            }
 243 //            // TODO: DEBUGGING
 244 //
 245 //            if (matched) {
 246 //                if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
 247 //                    // Sequence matcher has a match. Report it to observers.
 248 //                    mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
 249 //                    // Remove the now terminated sequence matcher.
 250 //                    matcher[j] = null;
 251 //                } else {
 252 //                    // Sequence matcher advanced one step, so move it to its corresponding new position iff the
 253 //                    // packet that advanced it has a later timestamp than that of the last matched packet of the
 254 //                    // sequence matcher at the new index, if any. In most traces, a small amount of the packets
 255 //                    // appear out of order (with regards to their timestamp), which is why this check is required.
 256 //                    // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
 257 //                    // order here.
 258 //                    if (matcher[j+1] == null ||
 259 //                            newPacket.getTimestamp().isAfter(matcher[j+1].getLastPacket().getTimestamp())) {
 260 //                        matcher[j+1] = sm;
 261 //                        if (matcher[j+1].getTargetUpperBound().size() == 4 && matcher[j+1].mMatchedPackets.size() > 1) {
 262 //                            System.out.println("Got here");
 263 //                        }
 264 //                    }
 265 //                }
 266 //                // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
 267 //                // from state zero completed its matching or if it replaced a different one in state 1 or not.
 268 //                if (sm.getMatchedPacketsCount() == 1) {
 269 //                    matcher[j] = new Layer2RangeMatcher(sm.getTargetLowerBound(), sm.getTargetUpperBound(),
 270 //                            mInclusionTimeMillis, mEps);
 271 //                }
 272 //            }
 273 //        }
 274 //    }
 275
 276     @Override
 277     protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
 278         // Note: we assume that all sequences in the input cluster are of the same length and that their packet
 279         // directions are identical.
 280         List<List<PcapPacket>> prunedCluster = new ArrayList<>();
 281         for (List<PcapPacket> originalClusterSeq : cluster) {
 282             boolean alreadyPresent = prunedCluster.stream().anyMatch(pcPkts -> {
 283                 for (int i = 0; i < pcPkts.size(); i++) {
 284                     if (pcPkts.get(i).getOriginalLength() != originalClusterSeq.get(i).getOriginalLength()) {
 285                         return false;
 286                     }
 287                 }
 288                 return true;
 289             });
 290             if (!alreadyPresent) {
 291                 // Add the sequence if not already present in the pruned cluster.
 292                 prunedCluster.add(originalClusterSeq);
 293             }
 294         }
 295         return prunedCluster;
 296     }
 297
 298     private static final boolean DEBUG = false;
 299
 300     @Override
 301     public void onNewFlow(Layer2FlowReassembler reassembler, Layer2Flow newFlow) {
 302         // New flow detected. Check if we should consider it when searching for cluster member matches.
 303         if (mFlowFilter.apply(newFlow)) {
 304             if (DEBUG) {
 305                 System.out.println(">>> ACCEPTING FLOW: " + newFlow + " <<<");
 306             }
 307             // Subscribe to the new flow to get updates whenever a new packet pertaining to the flow is processed.
 308             newFlow.addFlowObserver(this);
 309         } else if (DEBUG) {
 310             System.out.println(">>> IGNORING FLOW: " + newFlow + " <<<");
 311         }
 312     }
 313 }