Implementing relaxed matching for layer 2 and layer 3.
[pingpong.git] / Code / Projects / PacketLevelSignatureExtractor / src / main / java / edu / uci / iotproject / detection / layer2 / Layer2ClusterMatcher.java
1 package edu.uci.iotproject.detection.layer2;
2
3 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
4 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassembler;
5 import edu.uci.iotproject.trafficreassembly.layer2.Layer2Flow;
6 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassemblerObserver;
7 import edu.uci.iotproject.detection.AbstractClusterMatcher;
8 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowObserver;
9 import org.pcap4j.core.*;
10
11 import java.util.*;
12 import java.util.function.Function;
13
14 /**
15  * Attempts to detect members of a cluster (packet sequence mutations) in layer 2 flows.
16  *
17  * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
18  * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
19  */
20 public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Layer2FlowReassemblerObserver, Layer2FlowObserver {
21
22     /**
23      * Maps from a flow to a table of {@link Layer2SequenceMatcher}s for that particular flow. The table {@code t} is
24      * structured such that {@code t[i][j]} is a {@link Layer2SequenceMatcher} that attempts to match member {@code i}
25      * of {@link #mCluster} and has so far matched {@code j} packets of that particular sequence.
26      */
27     private final Map<Layer2Flow, Layer2SequenceMatcher[][]> mPerFlowSeqMatchers = new HashMap<>();
28     private final Map<Layer2Flow, List<Layer2RangeMatcher>> mPerFlowRangeMatcher = new HashMap<>();
29
30     private final Function<Layer2Flow, Boolean> mFlowFilter;
31
32     /**
33      * Specifying range-based instead of conservative exact matching.
34      */
35     private final boolean mRangeBased;
36
37     /**
38      * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
39      */
40     private final double mEps;
41
42     private int mInclusionTimeMillis;
43
44     /**
45      * Keeping track of maximum number of skipped packets
46      */
47     private int mMaxSkippedPackets;
48     private List<Integer> mSkippedPackets;
49
50     private int mLimitSkippedPackets;
51
52     /**
53      * Router's WLAN MAC.
54      */
55     private String mTrainingRouterWlanMac;
56     private String mRouterWlanMac;
57
58     /**
59      * Relaxed matching
60      */
61     private int mDelta;
62     private Set<Integer> mPacketSet;
63
64     /**
65      * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
66      * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
67      */
68     public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, String trainingRouterWlanMac, String routerWlanMac, int inclusionTimeMillis,
69                                 boolean isRangeBased, double eps, int limitSkippedPackets, int delta, Set<Integer> packetSet) {
70         // Consider all flows if no flow filter specified.
71         this(cluster, trainingRouterWlanMac, routerWlanMac, flow -> true, inclusionTimeMillis, isRangeBased, eps,
72                 limitSkippedPackets, delta, packetSet);
73     }
74
75     /**
76      * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
77      * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
78      * @param trainingRouterWlanMac The training router's WLAN MAC (used for determining the direction of packets).
79      * @param routerWlanMac The target trace router's WLAN MAC (used for determining the direction of packets).
80      * @param flowFilter A filter that defines what {@link Layer2Flow}s the new {@link Layer2ClusterMatcher} should
81      *                   search for {@code cluster}'s members in. If {@code flowFilter} returns {@code true}, the flow
82      *                   will be included (searched). Note that {@code flowFilter} is only queried once for each flow,
83      *                   namely when the {@link Layer2FlowReassembler} notifies the {@link Layer2ClusterMatcher} about
84      *                   the new flow. This functionality may for example come in handy when one only wants to search
85      *                   for matches in the subset of flows that involves a specific (range of) MAC(s).
86      * @param inclusionTimeMillis Packet inclusion time limit for matching.
87      * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
88      * @param eps The epsilon value used in the DBSCAN algorithm.
89      */
90     public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, String trainingRouterWlanMac, String routerWlanMac,
91                                 Function<Layer2Flow, Boolean> flowFilter, int inclusionTimeMillis, boolean isRangeBased,
92                                 double eps, int limitSkippedPackets, int delta, Set<Integer> packetSet) {
93         super(cluster, isRangeBased);
94         mFlowFilter = flowFilter;
95         mTrainingRouterWlanMac = trainingRouterWlanMac;
96         mRouterWlanMac = routerWlanMac;
97         mRangeBased = isRangeBased;
98         mEps = eps;
99         mInclusionTimeMillis =
100                 inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
101         mMaxSkippedPackets = 0;
102         mSkippedPackets = new ArrayList<>();
103         // Give integer's MAX_VALUE if -1
104         mLimitSkippedPackets = limitSkippedPackets == -1 ? Integer.MAX_VALUE : limitSkippedPackets;
105         mDelta = delta;
106         mPacketSet = packetSet;
107     }
108
109     @Override
110     public void onNewPacket(Layer2Flow flow, PcapPacket newPacket) {
111         if (mRangeBased) {
112             rangeBasedMatching(flow, newPacket);
113         } else {
114             conservativeMatching(flow, newPacket);
115         }
116     }
117
118     // TODO: Relaxed matching is applied in conservative matching
119     private void conservativeMatching(Layer2Flow flow, PcapPacket newPacket) {
120         if (mPerFlowSeqMatchers.get(flow) == null) {
121             // If this is the first time we encounter this flow, we need to set up sequence matchers for it.
122             // All sequences of the cluster have the same length, so we only need to compute the length of the nested
123             // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
124             // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
125             // around), so the length of the inner array is simply the sequence length.
126             Layer2SequenceMatcher[][] matchers = new Layer2SequenceMatcher[mCluster.size()][mCluster.get(0).size()];
127             // Prepare a "state 0" sequence matcher for each sequence variation in the cluster.
128             for (int i = 0; i < matchers.length; i++) {
129                 matchers[i][0] = new Layer2SequenceMatcher(mCluster.get(i), mInclusionTimeMillis, mTrainingRouterWlanMac,
130                         mRouterWlanMac, mDelta, mPacketSet);
131             }
132             // Associate the new sequence matcher table with the new flow
133             mPerFlowSeqMatchers.put(flow, matchers);
134         }
135         // Fetch table that contains sequence matchers for this flow.
136         Layer2SequenceMatcher[][] matchers = mPerFlowSeqMatchers.get(flow);
137         // Present the packet to all sequence matchers.
138         for (int i = 0; i < matchers.length; i++) {
139             // Present packet to the sequence matchers that has advanced the most first. This is to prevent discarding
140             // the sequence matchers that have advanced the most in the special case where the searched sequence
141             // contains two packets of the same length going in the same direction.
142             for (int j = matchers[i].length - 1; j >= 0 ; j--) {
143                 Layer2SequenceMatcher sm = matchers[i][j];
144                 if (sm == null) {
145                     // There is currently no sequence matcher that has managed to match j packets.
146                     continue;
147                 }
148                 boolean matched = sm.matchPacket(newPacket);
149                 if (matched) {
150                     if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
151                         // Update maximum skipped packets
152                         boolean stillMatch = checkMaxSkippedPackets(flow.getPackets(), sm.getMatchedPackets());
153                         if (stillMatch) {
154                             // Sequence matcher has a match. Report it to observers.
155                             mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
156                         }
157                         // Remove the now terminated sequence matcher.
158                         matchers[i][j] = null;
159                     } else {
160                         // Sequence matcher advanced one step, so move it to its corresponding new position iff the
161                         // packet that advanced it has a later timestamp than that of the last matched packet of the
162                         // sequence matcher at the new index, if any. In most traces, a small amount of the packets
163                         // appear out of order (with regards to their timestamp), which is why this check is required.
164                         // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
165                         // order here.
166                         if (matchers[i][j+1] == null ||
167                                 newPacket.getTimestamp().isAfter(matchers[i][j+1].getLastPacket().getTimestamp())) {
168                             matchers[i][j+1] = sm;
169                         }
170                     }
171                     // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
172                     // from state zero completed its matching or if it replaced a different one in state 1 or not.
173                     if (sm.getMatchedPacketsCount() == 1) {
174                         matchers[i][j] = new Layer2SequenceMatcher(sm.getTargetSequence(), mInclusionTimeMillis,
175                                 mTrainingRouterWlanMac, mRouterWlanMac, mDelta, mPacketSet);
176                     }
177                 }
178             }
179         }
180     }
181
182     // Update the maximum number of skipped packets.
183     private boolean checkMaxSkippedPackets(List<PcapPacket> flowPackets, List<PcapPacket> matchedPackets) {
184         // Count number of skipped packets by looking into
185         // the difference of indices of two matched packets.
186         boolean stillMatch = true;
187         for(int i = 1; i < matchedPackets.size(); ++i) {
188             int currIndex = flowPackets.indexOf(matchedPackets.get(i-1));
189             int nextIndex = flowPackets.indexOf(matchedPackets.get(i));
190             int skippedPackets = nextIndex - currIndex;
191             if (mMaxSkippedPackets < skippedPackets) {
192                 mMaxSkippedPackets = skippedPackets;
193             }
194             if (mLimitSkippedPackets < skippedPackets) {
195                 stillMatch = false;
196             }
197             mSkippedPackets.add(skippedPackets);
198         }
199         return stillMatch;
200     }
201
202     private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
203         // TODO: For range-based matching, we need to create a new matcher every time we see the first element of
204         //  the sequence (between lower and upper bounds).
205         if (mPerFlowRangeMatcher.get(flow) == null) {
206             // If this is the first time we encounter this flow, we need to set up a list of sequence matchers.
207             List<Layer2RangeMatcher> listMatchers = new ArrayList<>();
208             // Prepare a "state 0" sequence matcher.
209             Layer2RangeMatcher matcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
210                     mInclusionTimeMillis, mEps, mTrainingRouterWlanMac, mRouterWlanMac);
211             listMatchers.add(matcher);
212             // Associate the new sequence matcher table with the new flow.
213             mPerFlowRangeMatcher.put(flow, listMatchers);
214         }
215         // Fetch table that contains sequence matchers for this flow.
216         List<Layer2RangeMatcher> listMatchers = mPerFlowRangeMatcher.get(flow);
217         // Add a new matcher if all matchers have already advanced to the next stage.
218         // We always need a new matcher to match from NO packets.
219         boolean addOneArray = true;
220         for(Layer2RangeMatcher matcher : listMatchers) {
221             if (matcher.getMatchedPacketsCount() == 0) {
222                 addOneArray = false;
223             }
224         }
225         // Add the new matcher into the list
226         if (addOneArray) {
227             Layer2RangeMatcher newMatcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
228                     mInclusionTimeMillis, mEps, mTrainingRouterWlanMac, mRouterWlanMac);
229             listMatchers.add(newMatcher);
230         }
231         // Present packet to the sequence matchers.
232         // Make a shallow copy of the list so that we can clean up the actual list when a matcher is terminated.
233         // Otherwise, we would get an exception for changing the list while iterating on it.
234         List<Layer2RangeMatcher> listMatchersCopy = new ArrayList<>(listMatchers);
235         Layer2RangeMatcher previousMatcher = null;
236         for(Layer2RangeMatcher matcher : listMatchersCopy) {
237             Layer2RangeMatcher sm = matcher;
238             // Check if no packets are matched yet or if there are matched packets, the next packet to be matched
239             // has to be later than the last matched packet.
240             // In most traces, a small amount of the packets appear out of order (with regards to their timestamp),
241             // which is why this check is required.
242             // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
243             // order here.
244             if (sm.getMatchedPacketsCount() == 0 ||
245                     newPacket.getTimestamp().isAfter(sm.getLastPacket().getTimestamp())) {
246                 boolean matched = sm.matchPacket(newPacket);
247                 if (matched) {
248                     // BUG: found on May 29, 2019
249                     // We need to remove a previous match if the current match is later in time.
250                     // This is done only if we have matched at least 1 packet (we are about to match the second or
251                     // later packets) and we match for the same packet position in the signature (check the size!).
252                     if (previousMatcher != null && sm.getMatchedPacketsCount() > 1 &&
253                             previousMatcher.getMatchedPacketsCount() == sm.getMatchedPacketsCount()) {
254                         List<PcapPacket> previouslyMatchedPackets = previousMatcher.getMatchedPackets();
255                         List<PcapPacket> currentlyMatchedPackets = sm.getMatchedPackets();
256                         // We need to check 1 packet before the last matched packet from the previous matcher.
257                         int packetIndexToCheck = (sm.getMatchedPacketsCount() - 1) - 1;
258                         if (currentlyMatchedPackets.get(packetIndexToCheck).getTimestamp().
259                             isAfter(previouslyMatchedPackets.get(packetIndexToCheck).getTimestamp())) {
260                             listMatchers.remove(previousMatcher);
261                         }
262                     }
263                     if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
264                         // Update maximum skipped packets
265                         boolean stillMatch = checkMaxSkippedPackets(flow.getPackets(), sm.getMatchedPackets());
266                         if (stillMatch) {
267                             // Sequence matcher has a match. Report it to observers.
268                             mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
269                         }
270                         // Terminate sequence matcher since matching is complete.
271                         listMatchers.remove(matcher);
272                     }
273                     previousMatcher = sm;
274                 }
275             }
276         }
277     }
278
279     @Override
280     protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
281         // Note: we assume that all sequences in the input cluster are of the same length and that their packet
282         // directions are identical.
283         List<List<PcapPacket>> prunedCluster = new ArrayList<>();
284         for (List<PcapPacket> originalClusterSeq : cluster) {
285             boolean alreadyPresent = prunedCluster.stream().anyMatch(pcPkts -> {
286                 for (int i = 0; i < pcPkts.size(); i++) {
287                     if (pcPkts.get(i).getOriginalLength() != originalClusterSeq.get(i).getOriginalLength()) {
288                         return false;
289                     }
290                 }
291                 return true;
292             });
293             if (!alreadyPresent) {
294                 // Add the sequence if not already present in the pruned cluster.
295                 prunedCluster.add(originalClusterSeq);
296             }
297         }
298         return prunedCluster;
299     }
300
301     private static final boolean DEBUG = false;
302
303     @Override
304     public void onNewFlow(Layer2FlowReassembler reassembler, Layer2Flow newFlow) {
305         // New flow detected. Check if we should consider it when searching for cluster member matches.
306         if (mFlowFilter.apply(newFlow)) {
307             if (DEBUG) {
308                 System.out.println(">>> ACCEPTING FLOW: " + newFlow + " <<<");
309             }
310             // Subscribe to the new flow to get updates whenever a new packet pertaining to the flow is processed.
311             newFlow.addFlowObserver(this);
312         } else if (DEBUG) {
313             System.out.println(">>> IGNORING FLOW: " + newFlow + " <<<");
314         }
315     }
316
317     /**
318       * Return the maximum number of skipped packets.
319       */
320     public int getMaxSkippedPackets() {
321        return mMaxSkippedPackets;
322     }
323
324     /**
325      * Return the numbers of skipped packets.
326      */
327     public List<Integer> getSkippedPackets() {
328         return mSkippedPackets;
329     }
330 }