1 package edu.uci.iotproject.detection.layer2;
3 import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
4 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassembler;
5 import edu.uci.iotproject.trafficreassembly.layer2.Layer2Flow;
6 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowReassemblerObserver;
7 import edu.uci.iotproject.detection.AbstractClusterMatcher;
8 import edu.uci.iotproject.trafficreassembly.layer2.Layer2FlowObserver;
9 import org.pcap4j.core.*;
11 import java.util.ArrayList;
12 import java.util.HashMap;
13 import java.util.List;
15 import java.util.concurrent.CopyOnWriteArrayList;
16 import java.util.function.Function;
19 * Attempts to detect members of a cluster (packet sequence mutations) in layer 2 flows.
21 * @author Janus Varmarken {@literal <jvarmark@uci.edu>}
22 * @author Rahmadi Trimananda {@literal <rtrimana@uci.edu>}
24 public class Layer2ClusterMatcher extends AbstractClusterMatcher implements Layer2FlowReassemblerObserver, Layer2FlowObserver {
27 * Maps from a flow to a table of {@link Layer2SequenceMatcher}s for that particular flow. The table {@code t} is
28 * structured such that {@code t[i][j]} is a {@link Layer2SequenceMatcher} that attempts to match member {@code i}
29 * of {@link #mCluster} and has so far matched {@code j} packets of that particular sequence.
31 private final Map<Layer2Flow, Layer2SequenceMatcher[][]> mPerFlowSeqMatchers = new HashMap<>();
32 // private final Map<Layer2Flow, Layer2RangeMatcher[]> mPerFlowRangeMatcher = new HashMap<>();
33 private final Map<Layer2Flow, List<Layer2RangeMatcher>> mPerFlowRangeMatcher = new HashMap<>();
35 private final Function<Layer2Flow, Boolean> mFlowFilter;
38 * Specifying range-based instead of conservative exact matching.
40 private final boolean mRangeBased;
43 * Epsilon value used by the DBSCAN algorithm; it is used again for range-based matching here.
45 private final double mEps;
47 private int mInclusionTimeMillis;
50 * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
51 * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
53 public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, int inclusionTimeMillis,
54 boolean isRangeBased, double eps) {
55 // Consider all flows if no flow filter specified.
56 this(cluster, flow -> true, inclusionTimeMillis, isRangeBased, eps);
60 * Create a new {@link Layer2ClusterMatcher} that attempts to find occurrences of {@code cluster}'s members.
61 * @param cluster The sequence mutations that the new {@link Layer2ClusterMatcher} should search for.
62 * @param flowFilter A filter that defines what {@link Layer2Flow}s the new {@link Layer2ClusterMatcher} should
63 * search for {@code cluster}'s members in. If {@code flowFilter} returns {@code true}, the flow
64 * will be included (searched). Note that {@code flowFilter} is only queried once for each flow,
65 * namely when the {@link Layer2FlowReassembler} notifies the {@link Layer2ClusterMatcher} about
66 * the new flow. This functionality may for example come in handy when one only wants to search
67 * for matches in the subset of flows that involves a specific (range of) MAC(s).
68 * @param inclusionTimeMillis Packet inclusion time limit for matching.
69 * @param isRangeBased The boolean that decides if it is range-based vs. strict matching.
70 * @param eps The epsilon value used in the DBSCAN algorithm.
72 public Layer2ClusterMatcher(List<List<PcapPacket>> cluster, Function<Layer2Flow, Boolean> flowFilter,
73 int inclusionTimeMillis, boolean isRangeBased, double eps) {
74 super(cluster, isRangeBased);
75 mFlowFilter = flowFilter;
76 mRangeBased = isRangeBased;
78 mInclusionTimeMillis =
79 inclusionTimeMillis == 0 ? TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS : inclusionTimeMillis;
83 public void onNewPacket(Layer2Flow flow, PcapPacket newPacket) {
85 rangeBasedMatching(flow, newPacket);
87 conservativeMatching(flow, newPacket);
91 private void conservativeMatching(Layer2Flow flow, PcapPacket newPacket) {
92 if (mPerFlowSeqMatchers.get(flow) == null) {
93 // If this is the first time we encounter this flow, we need to set up sequence matchers for it.
94 // All sequences of the cluster have the same length, so we only need to compute the length of the nested
95 // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
96 // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
97 // around), so the length of the inner array is simply the sequence length.
98 Layer2SequenceMatcher[][] matchers = new Layer2SequenceMatcher[mCluster.size()][mCluster.get(0).size()];
99 // Prepare a "state 0" sequence matcher for each sequence variation in the cluster.
100 for (int i = 0; i < matchers.length; i++) {
101 matchers[i][0] = new Layer2SequenceMatcher(mCluster.get(i), mInclusionTimeMillis);
103 // Associate the new sequence matcher table with the new flow
104 mPerFlowSeqMatchers.put(flow, matchers);
106 // Fetch table that contains sequence matchers for this flow.
107 Layer2SequenceMatcher[][] matchers = mPerFlowSeqMatchers.get(flow);
108 // Present the packet to all sequence matchers.
109 for (int i = 0; i < matchers.length; i++) {
110 // Present packet to the sequence matchers that has advanced the most first. This is to prevent discarding
111 // the sequence matchers that have advanced the most in the special case where the searched sequence
112 // contains two packets of the same length going in the same direction.
113 for (int j = matchers[i].length - 1; j >= 0 ; j--) {
114 Layer2SequenceMatcher sm = matchers[i][j];
116 // There is currently no sequence matcher that has managed to match j packets.
119 boolean matched = sm.matchPacket(newPacket);
121 if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
122 // Sequence matcher has a match. Report it to observers.
123 mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
124 // Remove the now terminated sequence matcher.
125 matchers[i][j] = null;
127 // Sequence matcher advanced one step, so move it to its corresponding new position iff the
128 // packet that advanced it has a later timestamp than that of the last matched packet of the
129 // sequence matcher at the new index, if any. In most traces, a small amount of the packets
130 // appear out of order (with regards to their timestamp), which is why this check is required.
131 // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
133 if (matchers[i][j+1] == null ||
134 newPacket.getTimestamp().isAfter(matchers[i][j+1].getLastPacket().getTimestamp())) {
135 matchers[i][j+1] = sm;
138 // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
139 // from state zero completed its matching or if it replaced a different one in state 1 or not.
140 if (sm.getMatchedPacketsCount() == 1) {
141 matchers[i][j] = new Layer2SequenceMatcher(sm.getTargetSequence(), mInclusionTimeMillis);
148 private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
149 // TODO: For range-based matching, we need to create a new matcher every time we see the first element of
150 // the sequence (between lower and upper bounds).
151 if (mPerFlowRangeMatcher.get(flow) == null) {
152 // If this is the first time we encounter this flow, we need to set up a list of sequence matchers.
153 List<Layer2RangeMatcher> listMatchers = new ArrayList<>();
154 // Prepare a "state 0" sequence matcher.
155 Layer2RangeMatcher matcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
156 mInclusionTimeMillis, mEps);
157 listMatchers.add(matcher);
158 // Associate the new sequence matcher table with the new flow.
159 mPerFlowRangeMatcher.put(flow, listMatchers);
161 // Fetch table that contains sequence matchers for this flow.
162 List<Layer2RangeMatcher> listMatchers = mPerFlowRangeMatcher.get(flow);
163 // Add a new matcher if all matchers have already advanced to the next stage.
164 // We always need a new matcher to match from NO packets.
165 boolean addOneArray = true;
166 for(Layer2RangeMatcher matcher : listMatchers) {
167 if (matcher.getMatchedPacketsCount() == 0) {
171 // Add the new matcher into the list
173 Layer2RangeMatcher newMatcher = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1),
174 mInclusionTimeMillis, mEps);
175 listMatchers.add(newMatcher);
177 // Present packet to the sequence matchers.
178 // Make a shallow copy of the list so that we can clean up the actual list when a matcher is terminated
179 List<Layer2RangeMatcher> listMatchersCopy = new ArrayList<>(listMatchers);
180 for(Layer2RangeMatcher matcher : listMatchersCopy) {
181 Layer2RangeMatcher sm = matcher;
182 // Check if no packets are matched yet or if there are matched packets, the next packet to be matched
183 // has to be later than the last matched packet.
184 // In most traces, a small amount of the packets appear out of order (with regards to their timestamp),
185 // which is why this check is required.
186 // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
188 if (sm.getMatchedPacketsCount() == 0 ||
189 newPacket.getTimestamp().isAfter(sm.getLastPacket().getTimestamp())) {
190 boolean matched = sm.matchPacket(newPacket);
192 if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
193 // Sequence matcher has a match. Report it to observers.
194 mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
195 // Terminate sequence matcher since matching is complete.
196 listMatchers.remove(matcher);
203 // private void rangeBasedMatching(Layer2Flow flow, PcapPacket newPacket) {
204 // // TODO: For range-based matching, we only care about matching a range; therefore it is a matcher array.
205 // if (mPerFlowRangeMatcher.get(flow) == null) {
206 // // If this is the first time we encounter this flow, we need to set up a sequence matcher.
207 // // All sequences of the cluster have the same length, so we only need to compute the length of the
208 // // arrays once. We want to make room for a cluster matcher in each state, including the initial empty state
209 // // but excluding the final "full match" state (as there is no point in keeping a terminated sequence matcher
210 // // around), so the length of the array is simply the sequence length.
211 // Layer2RangeMatcher[] matcher = new Layer2RangeMatcher[mCluster.get(0).size()];
212 // // Prepare a "state 0" sequence matcher.
213 // matcher[0] = new Layer2RangeMatcher(mCluster.get(0), mCluster.get(1), mInclusionTimeMillis, mEps);
214 // // Associate the new sequence matcher table with the new flow.
215 // mPerFlowRangeMatcher.put(flow, matcher);
217 // // Fetch table that contains sequence matchers for this flow.
218 // Layer2RangeMatcher[] matcher = mPerFlowRangeMatcher.get(flow);
219 // // Present packet to the sequence matcher.
220 // for (int j = matcher.length - 1; j >= 0; j--) {
221 // Layer2RangeMatcher sm = matcher[j];
223 // // There is currently no sequence matcher that has managed to match j packets.
226 // boolean matched = sm.matchPacket(newPacket);
228 // // TODO: DEBUGGING
229 // long timeStamp = newPacket.getTimestamp().getEpochSecond();
230 // if (339 == newPacket.length() && timeStamp == 1542297773) {
231 // System.out.println("Timestamp of length 339: " + newPacket.getTimestamp().getEpochSecond());
232 // int length = matcher.length;
234 // if (329 == newPacket.length() && timeStamp == 1542297773) {
235 // System.out.println("Timestamp of length 329: " + newPacket.getTimestamp().getEpochSecond());
237 // if (364 <= newPacket.length() && newPacket.length() <= 365 && timeStamp == 1542297773) {
238 // System.out.println("Timestamp of length 364-365: " + newPacket.getTimestamp().getEpochSecond());
240 // if (1061 <= newPacket.length() && newPacket.length() <= 1070 && timeStamp == 1542297773) {
241 // System.out.println("Timestamp of length 1061-1070: " + newPacket.getTimestamp().getEpochSecond());
243 // // TODO: DEBUGGING
246 // if (sm.getMatchedPacketsCount() == sm.getTargetSequencePacketCount()) {
247 // // Sequence matcher has a match. Report it to observers.
248 // mObservers.forEach(o -> o.onMatch(this, sm.getMatchedPackets()));
249 // // Remove the now terminated sequence matcher.
250 // matcher[j] = null;
252 // // Sequence matcher advanced one step, so move it to its corresponding new position iff the
253 // // packet that advanced it has a later timestamp than that of the last matched packet of the
254 // // sequence matcher at the new index, if any. In most traces, a small amount of the packets
255 // // appear out of order (with regards to their timestamp), which is why this check is required.
256 // // Obviously it would not be needed if packets where guaranteed to be processed in timestamp
258 // if (matcher[j+1] == null ||
259 // newPacket.getTimestamp().isAfter(matcher[j+1].getLastPacket().getTimestamp())) {
260 // matcher[j+1] = sm;
261 // if (matcher[j+1].getTargetUpperBound().size() == 4 && matcher[j+1].mMatchedPackets.size() > 1) {
262 // System.out.println("Got here");
266 // // We always want to have a sequence matcher in state 0, regardless of if the one that advanced
267 // // from state zero completed its matching or if it replaced a different one in state 1 or not.
268 // if (sm.getMatchedPacketsCount() == 1) {
269 // matcher[j] = new Layer2RangeMatcher(sm.getTargetLowerBound(), sm.getTargetUpperBound(),
270 // mInclusionTimeMillis, mEps);
277 protected List<List<PcapPacket>> pruneCluster(List<List<PcapPacket>> cluster) {
278 // Note: we assume that all sequences in the input cluster are of the same length and that their packet
279 // directions are identical.
280 List<List<PcapPacket>> prunedCluster = new ArrayList<>();
281 for (List<PcapPacket> originalClusterSeq : cluster) {
282 boolean alreadyPresent = prunedCluster.stream().anyMatch(pcPkts -> {
283 for (int i = 0; i < pcPkts.size(); i++) {
284 if (pcPkts.get(i).getOriginalLength() != originalClusterSeq.get(i).getOriginalLength()) {
290 if (!alreadyPresent) {
291 // Add the sequence if not already present in the pruned cluster.
292 prunedCluster.add(originalClusterSeq);
295 return prunedCluster;
298 private static final boolean DEBUG = false;
301 public void onNewFlow(Layer2FlowReassembler reassembler, Layer2Flow newFlow) {
302 // New flow detected. Check if we should consider it when searching for cluster member matches.
303 if (mFlowFilter.apply(newFlow)) {
305 System.out.println(">>> ACCEPTING FLOW: " + newFlow + " <<<");
307 // Subscribe to the new flow to get updates whenever a new packet pertaining to the flow is processed.
308 newFlow.addFlowObserver(this);
310 System.out.println(">>> IGNORING FLOW: " + newFlow + " <<<");