Code/Projects/PacketLevelSignatureExtractor/src/main/java/edu/uci/iotproject/detection/layer2/Layer2ClusterMatcher.java

   1 package edu.uci.iotproject.detection.layer2;
   2
   3 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
   4 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassembler;
   5 import edu.uci.iotproject.trafficreassembly.layer2.Layer2Flow;
   6 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassemblerObserver;
   7 import edu.uci.iotproject.detection.AbstractClusterMatcher;
   8 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowObserver;
   9 import org.pcap4j.core.*;
  10
  11 import java.util.ArrayList;
  12 import java.util.HashMap;
  13 import java.util.List;
  14 import java.util.Map;
  15 import java.util.function.Function;
  16
  17 /**
  18  * Attempts to detect members of a cluster (packet sequence mutations) in layer 2 flows.
  19  *
  20  * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
  21  * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
  22  */
  23 public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Layer2FlowReassemblerObserver, Layer2FlowObserver {
  24
  25     /**
  26      * Maps from a flow to a table of {@link Layer2SequenceMatcher}s for that particular flow. The table {@code t} is
  27      * structured such that {@code t[i][j]} is a {@link Layer2SequenceMatcher} that attempts to match member {@code i}
  28      * of {@link #mCluster} and has so far matched {@code j} packets of that particular sequence.
  29      */
  30     private final Map<Layer2Flow, Layer2SequenceMatcher[][]> mPerFlowSeqMatchers = new HashMap<>();
  31     private final Map<Layer2Flow, List<Layer2RangeMatcher>> mPerFlowRangeMatcher = new HashMap<>();
  32
  33     private final Function<Layer2Flow, Boolean> mFlowFilter;
  34
  35     /**
  36      * Specifying range-based instead of conservative exact matching.
  37      */
  38     private final boolean mRangeBased;
  39
  40     /**
  41      * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
  42      */
  43     private final double mEps;
  44
  45     private int mInclusionTimeMillis;
  46
  47     /**
  48      * Keeping track of maximum number of skipped packets
  49      */
  50     private int mMaxSkippedPackets;
  51     private List<Integer> mSkippedPackets;
  52
  53     private int mLimitSkippedPackets;
  54
  55     /**
  56      * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
  57      * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
  58      */
  59     public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, int inclusionTimeMillis,
  60                                 boolean isRangeBased, double eps, int limitSkippedPackets) {
  61         // Consider all flows if no flow filter specified.
  62         this(cluster, flow -> true, inclusionTimeMillis, isRangeBased, eps, limitSkippedPackets);
  63     }
  64
  65     /**
  66      * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
  67      * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
  68      * @param flowFilter A filter that defines what {@link Layer2Flow}s the new {@link Layer2ClusterMatcher} should
  69      *                   search for {@code cluster}'s members in. If {@code flowFilter} returns {@code true}, the flow
  70      *                   will be included (searched). Note that {@code flowFilter} is only queried once for each flow,
  71      *                   namely when the {@link Layer2FlowReassembler} notifies the {@link Layer2ClusterMatcher} about
  72      *                   the new flow. This functionality may for example come in handy when one only wants to search
  73      *                   for matches in the subset of flows that involves a specific (range of) MAC(s).
  74      * @param inclusionTimeMillis Packet inclusion time limit for matching.
  75      * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
  76      * @param eps The epsilon value used in the DBSCAN algorithm.
  77      */
  78     public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, Function<Layer2Flow, Boolean> flowFilter,
  79                                 int inclusionTimeMillis, boolean isRangeBased, double eps, int limitSkippedPackets) {
  80         super(cluster, isRangeBased);
  81         mFlowFilter = flowFilter;
  82         mRangeBased = isRangeBased;
  83         mEps = eps;
  84         mInclusionTimeMillis =
  85                 inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
  86         mMaxSkippedPackets = 0;
  87         mSkippedPackets = new ArrayList<>();
  88         // Give integer's MAX_VALUE if -1
  89         mLimitSkippedPackets = limitSkippedPackets == -1 ? Integer.MAX_VALUE : limitSkippedPackets;
  90     }
  91
  92     @Override
  93     public void onNewPacket(Layer2Flow flow, PcapPacket newPacket) {
  94         if (mRangeBased) {
  95             rangeBasedMatching(flow, newPacket);
  96         } else {
  97             conservativeMatching(flow, newPacket);
  98         }
  99     }
 100
 101     private void conservativeMatching(Layer2Flow flow, PcapPacket newPacket) {
 102         if (mPerFlowSeqMatchers.get(flow) == null) {
 103             // If this is the first time we encounter this flow, we need to set up sequence matchers for it.
 104             // All sequences of the cluster have the same length, so we only need to compute the length of the nested
 105             // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
 106             // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
 107             // around), so the length of the inner array is simply the sequence length.
 108             Layer2SequenceMatcher[][] matchers = new Layer2SequenceMatcher[mCluster.size()][mCluster.get(0).size()];
 109             // Prepare a "state 0" sequence matcher for each sequence variation in the cluster.
 110             for (int i = 0; i < matchers.length; i++) {
 111                 matchers[i][0] = new Layer2SequenceMatcher(mCluster.get(i), mInclusionTimeMillis);
 112             }
 113             // Associate the new sequence matcher table with the new flow
 114             mPerFlowSeqMatchers.put(flow, matchers);
 115         }
 116         // Fetch table that contains sequence matchers for this flow.
 117         Layer2SequenceMatcher[][] matchers = mPerFlowSeqMatchers.get(flow);
 118         // Present the packet to all sequence matchers.
 119         for (int i = 0; i < matchers.length; i++) {
 120             // Present packet to the sequence matchers that has advanced the most first. This is to prevent discarding
 121             // the sequence matchers that have advanced the most in the special case where the searched sequence
 122             // contains two packets of the same length going in the same direction.
 123             for (int j = matchers[i].length - 1; j >= 0 ; j--) {
 124                 Layer2SequenceMatcher sm = matchers[i][j];
 125                 if (sm == null) {
 126                     // There is currently no sequence matcher that has managed to match j packets.
 127                     continue;
 128                 }
 129                 boolean matched = sm.matchPacket(newPacket);
 130                 if (matched) {
 131                     if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
 132                         // Update maximum skipped packets
 133                         boolean stillMatch = checkMaxSkippedPackets(flow.getPackets(), sm.getMatchedPackets());
 134                         if (stillMatch) {
 135                             // Sequence matcher has a match. Report it to observers.
 136                             mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
 137                         }
 138                         // Remove the now terminated sequence matcher.
 139                         matchers[i][j] = null;
 140                     } else {
 141                         // Sequence matcher advanced one step, so move it to its corresponding new position iff the
 142                         // packet that advanced it has a later timestamp than that of the last matched packet of the
 143                         // sequence matcher at the new index, if any. In most traces, a small amount of the packets
 144                         // appear out of order (with regards to their timestamp), which is why this check is required.
 145                         // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
 146                         // order here.
 147                         if (matchers[i][j+1] == null ||
 148                                 newPacket.getTimestamp().isAfter(matchers[i][j+1].getLastPacket().getTimestamp())) {
 149                             matchers[i][j+1] = sm;
 150                         }
 151                     }
 152                     // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
 153                     // from state zero completed its matching or if it replaced a different one in state 1 or not.
 154                     if (sm.getMatchedPacketsCount() == 1) {
 155                         matchers[i][j] = new Layer2SequenceMatcher(sm.getTargetSequence(), mInclusionTimeMillis);
 156                     }
 157                 }
 158             }
 159         }
 160     }
 161
 162     // Update the maximum number of skipped packets.
 163     private boolean checkMaxSkippedPackets(List<PcapPacket> flowPackets, List<PcapPacket> matchedPackets) {
 164         // Count number of skipped packets by looking into
 165         // the difference of indices of two matched packets.
 166         boolean stillMatch = true;
 167         for(int i = 1; i < matchedPackets.size(); ++i) {
 168             int currIndex = flowPackets.indexOf(matchedPackets.get(i-1));
 169             int nextIndex = flowPackets.indexOf(matchedPackets.get(i));
 170             int skippedPackets = nextIndex - currIndex;
 171             if (mMaxSkippedPackets < skippedPackets) {
 172                 mMaxSkippedPackets = skippedPackets;
 173             }
 174             if (mLimitSkippedPackets < skippedPackets) {
 175                 stillMatch = false;
 176             }
 177             mSkippedPackets.add(skippedPackets);
 178         }
 179         return stillMatch;
 180     }
 181
 182     private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
 183         // TODO: For range-based matching, we need to create a new matcher every time we see the first element of
 184         //  the sequence (between lower and upper bounds).
 185         if (mPerFlowRangeMatcher.get(flow) == null) {
 186             // If this is the first time we encounter this flow, we need to set up a list of sequence matchers.
 187             List<Layer2RangeMatcher> listMatchers = new ArrayList<>();
 188             // Prepare a "state 0" sequence matcher.
 189             Layer2RangeMatcher matcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
 190                     mInclusionTimeMillis, mEps);
 191             listMatchers.add(matcher);
 192             // Associate the new sequence matcher table with the new flow.
 193             mPerFlowRangeMatcher.put(flow, listMatchers);
 194         }
 195         // Fetch table that contains sequence matchers for this flow.
 196         List<Layer2RangeMatcher> listMatchers = mPerFlowRangeMatcher.get(flow);
 197         // Add a new matcher if all matchers have already advanced to the next stage.
 198         // We always need a new matcher to match from NO packets.
 199         boolean addOneArray = true;
 200         for(Layer2RangeMatcher matcher : listMatchers) {
 201             if (matcher.getMatchedPacketsCount() == 0) {
 202                 addOneArray = false;
 203             }
 204         }
 205         // Add the new matcher into the list
 206         if (addOneArray) {
 207             Layer2RangeMatcher newMatcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
 208                     mInclusionTimeMillis, mEps);
 209             listMatchers.add(newMatcher);
 210         }
 211         // Present packet to the sequence matchers.
 212         // Make a shallow copy of the list so that we can clean up the actual list when a matcher is terminated.
 213         // Otherwise, we would get an exception for changing the list while iterating on it.
 214         List<Layer2RangeMatcher> listMatchersCopy = new ArrayList<>(listMatchers);
 215         Layer2RangeMatcher previousMatcher = null;
 216         for(Layer2RangeMatcher matcher : listMatchersCopy) {
 217             Layer2RangeMatcher sm = matcher;
 218             // Check if no packets are matched yet or if there are matched packets, the next packet to be matched
 219             // has to be later than the last matched packet.
 220             // In most traces, a small amount of the packets appear out of order (with regards to their timestamp),
 221             // which is why this check is required.
 222             // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
 223             // order here.
 224             if (sm.getMatchedPacketsCount() == 0 ||
 225                     newPacket.getTimestamp().isAfter(sm.getLastPacket().getTimestamp())) {
 226                 boolean matched = sm.matchPacket(newPacket);
 227                 if (matched) {
 228                     // BUG: found on May 29, 2019
 229                     // We need to remove a previous match if the current match is later in time.
 230                     // This is done only if we have matched at least 1 packet (we are about to match the second or
 231                     // later packets) and we match for the same packet position in the signature (check the size!).
 232                     if (previousMatcher != null && sm.getMatchedPacketsCount() > 1 &&
 233                             previousMatcher.getMatchedPacketsCount() == sm.getMatchedPacketsCount()) {
 234                         List<PcapPacket> previouslyMatchedPackets = previousMatcher.getMatchedPackets();
 235                         List<PcapPacket> currentlyMatchedPackets = sm.getMatchedPackets();
 236                         // We need to check 1 packet before the last matched packet from the previous matcher.
 237                         int packetIndexToCheck = (sm.getMatchedPacketsCount() - 1) - 1;
 238                         if (currentlyMatchedPackets.get(packetIndexToCheck).getTimestamp().
 239                             isAfter(previouslyMatchedPackets.get(packetIndexToCheck).getTimestamp())) {
 240                             listMatchers.remove(previousMatcher);
 241                         }
 242                     }
 243                     if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
 244                         // Update maximum skipped packets
 245                         boolean stillMatch = checkMaxSkippedPackets(flow.getPackets(), sm.getMatchedPackets());
 246                         if (stillMatch) {
 247                             // Sequence matcher has a match. Report it to observers.
 248                             mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
 249                         }
 250                         // Terminate sequence matcher since matching is complete.
 251                         listMatchers.remove(matcher);
 252                     }
 253                     previousMatcher = sm;
 254                 }
 255             }
 256         }
 257     }
 258
 259     @Override
 260     protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
 261         // Note: we assume that all sequences in the input cluster are of the same length and that their packet
 262         // directions are identical.
 263         List<List<PcapPacket>> prunedCluster = new ArrayList<>();
 264         for (List<PcapPacket> originalClusterSeq : cluster) {
 265             boolean alreadyPresent = prunedCluster.stream().anyMatch(pcPkts -> {
 266                 for (int i = 0; i < pcPkts.size(); i++) {
 267                     if (pcPkts.get(i).getOriginalLength() != originalClusterSeq.get(i).getOriginalLength()) {
 268                         return false;
 269                     }
 270                 }
 271                 return true;
 272             });
 273             if (!alreadyPresent) {
 274                 // Add the sequence if not already present in the pruned cluster.
 275                 prunedCluster.add(originalClusterSeq);
 276             }
 277         }
 278         return prunedCluster;
 279     }
 280
 281     private static final boolean DEBUG = false;
 282
 283     @Override
 284     public void onNewFlow(Layer2FlowReassembler reassembler, Layer2Flow newFlow) {
 285         // New flow detected. Check if we should consider it when searching for cluster member matches.
 286         if (mFlowFilter.apply(newFlow)) {
 287             if (DEBUG) {
 288                 System.out.println(">>> ACCEPTING FLOW: " + newFlow + " <<<");
 289             }
 290             // Subscribe to the new flow to get updates whenever a new packet pertaining to the flow is processed.
 291             newFlow.addFlowObserver(this);
 292         } else if (DEBUG) {
 293             System.out.println(">>> IGNORING FLOW: " + newFlow + " <<<");
 294         }
 295     }
 296
 297     /**
 298       * Return the maximum number of skipped packets.
 299       */
 300     public int getMaxSkippedPackets() {
 301        return mMaxSkippedPackets;
 302     }
 303
 304     /**
 305      * Return the numbers of skipped packets.
 306      */
 307     public List<Integer> getSkippedPackets() {
 308         return mSkippedPackets;
 309     }
 310 }