Main.java: add code for exploring the number of false positives for sequence alignment.
TcpConversationUtils.java: move implementation of getPacketLengthSequence(Conversation) to this class from SequenceExtraction as it is a generally useful util method.
import edu.uci.iotproject.analysis.TrafficLabeler;
import edu.uci.iotproject.analysis.TriggerTrafficExtractor;
import edu.uci.iotproject.analysis.UserAction;
+import edu.uci.iotproject.comparison.seqalignment.ExtractedSequence;
+import edu.uci.iotproject.comparison.seqalignment.SequenceAlignment;
+import edu.uci.iotproject.comparison.seqalignment.SequenceExtraction;
import edu.uci.iotproject.io.TriggerTimesFileReader;
import org.pcap4j.core.*;
import org.pcap4j.packet.namednumber.DataLinkType;
// ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------
// Paths to input and output files (consider supplying these as arguments instead) and IP of the device for
// which traffic is to be extracted:
- String path = "/scratch/July-2018"; // Rahmadi
- //String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
+// String path = "/scratch/July-2018"; // Rahmadi
+ String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
// 1) D-Link July 26 experiment
// final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
// 2b) TP-Link July 25 experiment TRUNCATED:
// Only contains "true local" events, i.e., before the behavior changes to remote-like behavior.
// Last included event is at July 25 10:38:11; file filtered to only include packets with arrival time <= 10:38:27.
-// final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.truncated.pcap";
-// final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.truncated.pcap";
-// final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.truncated.timestamps";
-// final String deviceIp = "192.168.1.159";
+ final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.truncated.pcap";
+ final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.truncated.pcap";
+ final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.truncated.timestamps";
+ final String deviceIp = "192.168.1.159";
// 3) SmartThings Plug July 25 experiment
// final String inputPcapFile = path + "/2018-07/stplug/stplug.wlan1.local.pcap";
// final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-16-2018.timestamps";
// final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa
// August 17
- final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap";
- final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap";
- final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps";
- final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa
+// final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap";
+// final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap";
+// final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps";
+// final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa
TriggerTimesFileReader ttfr = new TriggerTimesFileReader();
List<Instant> triggerTimes = ttfr.readTriggerTimes(triggerTimesFile, false);
});
});
+ // ================================================================================================
+ // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>>
+ //
+ // Currently need to know relevant hostname in advance :(
+ String hostname = "events.tplinkra.com";
+ // Conversations with 'hostname' for ON events.
+ List<Conversation> onsForHostname = new ArrayList<>();
+ // Conversations with 'hostname' for OFF events.
+ List<Conversation> offsForHostname = new ArrayList<>();
+ // "Unwrap" sequence groupings in ons/offs maps.
+ ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
+ offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
+ // Extract representative sequence for ON and OFF by providing the list of conversations with
+ // 'hostname' observed for each event type (the training data).
+ SequenceExtraction seqExtraction = new SequenceExtraction();
+ ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname);
+ ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname);
+ // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
+ // labeled).
+ int onsLabeledAsOff = 0;
+ Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
+ Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
+ SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
+ for (Conversation c : onsForHostname) {
+ Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
+ if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
+ onsLabeledAsOff++;
+ }
+ }
+ int offsLabeledAsOn = 0;
+ for (Conversation c : offsForHostname) {
+ Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
+ if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
+ offsLabeledAsOn++;
+ }
+ }
System.out.println("");
+ // ================================================================================================
+
// -------------------------------------------------------------------------------------------------------------
// -------------------------------------------------------------------------------------------------------------
+++ /dev/null
-package edu.uci.iotproject;
-
-import edu.uci.iotproject.comparison.seqalignment.AlignmentPricer;
-import edu.uci.iotproject.comparison.seqalignment.SequenceAlignment;
-import org.pcap4j.core.PcapPacket;
-
-import java.util.List;
-import java.util.Map;
-
-/**
- * TODO add class documentation.
- *
- * @author Janus Varmarken
- */
-public class SequenceExtraction {
-
-
- private final SequenceAlignment<Integer> mAlignmentAlg;
-
-
- public SequenceExtraction() {
- mAlignmentAlg = new SequenceAlignment<>(new AlignmentPricer<>((i1,i2) -> Math.abs(i1-i2), i -> 10));
- }
-
-
- public SequenceExtraction(SequenceAlignment<Integer> alignmentAlgorithm) {
- mAlignmentAlg = alignmentAlgorithm;
- }
-
- // Initial
-// /**
-// *
-// * @param convsForAction A set of {@link Conversation}s known to be associated with a single type of user action.
-// */
-// public void extract(List<Conversation> convsForAction) {
-// int maxDifference = 0;
-//
-// for (int i = 0; i < convsForAction.size(); i++) {
-// for (int j = i+1; j < convsForAction.size(); i++) {
-// Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
-// Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
-// int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
-// if (alignmentCost > maxDifference) {
-// maxDifference = alignmentCost;
-// }
-// }
-// }
-//
-// }
-
-
-// public void extract(Map<String, List<Conversation>> hostnameToConvs) {
-// int maxDifference = 0;
-//
-// for (int i = 0; i < convsForAction.size(); i++) {
-// for (int j = i+1; j < convsForAction.size(); i++) {
-// Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
-// Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
-// int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
-// if (alignmentCost > maxDifference) {
-// maxDifference = alignmentCost;
-// }
-// }
-// }
-//
-// }
-
- private Integer[] getPacketLengthSequence(Conversation c) {
- List<PcapPacket> packets = c.getPackets();
- Integer[] packetLengthSequence = new Integer[packets.size()];
- for (int i = 0; i < packetLengthSequence.length; i++) {
- packetLengthSequence[i] = packets.get(i).length();
- }
- return packetLengthSequence;
- }
-}
return result;
}
+ /**
+ * Given a {@link Conversation}, extract its packet length sequence.
+ * @param c The {@link Conversation} from which a packet length sequence is to be extracted.
+ * @return An {@code Integer[]} that holds the packet lengths of all payload-carrying packets in {@code c}. The
+ * packet lengths in the returned array are ordered by packet timestamp.
+ */
+ public static Integer[] getPacketLengthSequence(Conversation c) {
+ List<PcapPacket> packets = c.getPackets();
+ Integer[] packetLengthSequence = new Integer[packets.size()];
+ for (int i = 0; i < packetLengthSequence.length; i++) {
+ packetLengthSequence[i] = packets.get(i).getOriginalLength();
+ }
+ return packetLengthSequence;
+ }
+
/**
* Appends a space to {@code sb} <em>iff</em> {@code sb} already contains some content.
* @param sb A {@link StringBuilder} that should have a space appended <em>iff</em> it is not empty.
--- /dev/null
+package edu.uci.iotproject.comparison.seqalignment;
+
+import edu.uci.iotproject.Conversation;
+
+/**
+ * TODO add class documentation.
+ *
+ * @author Janus Varmarken
+ */
+public class ExtractedSequence {
+
+ private final Conversation mRepresentativeSequence;
+
+ private final int mMaxAlignmentCost;
+
+ private final String mSequenceString;
+
+ public ExtractedSequence(Conversation sequence, int maxAlignmentCost) {
+ mRepresentativeSequence = sequence;
+ mMaxAlignmentCost = maxAlignmentCost;
+ StringBuilder sb = new StringBuilder();
+ sequence.getPackets().forEach(p -> {
+ if (sb.length() != 0) sb.append(" ");
+ sb.append(p.getOriginalLength());
+ });
+ mSequenceString = sb.toString();
+ }
+
+ public Conversation getRepresentativeSequence() {
+ return mRepresentativeSequence;
+ }
+
+ public int getMaxAlignmentCost() {
+ return mMaxAlignmentCost;
+ }
+
+}
--- /dev/null
+package edu.uci.iotproject.comparison.seqalignment;
+
+import edu.uci.iotproject.Conversation;
+import edu.uci.iotproject.analysis.TcpConversationUtils;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * TODO add class documentation.
+ *
+ * @author Janus Varmarken
+ */
+public class SequenceExtraction {
+
+
+ private final SequenceAlignment<Integer> mAlignmentAlg;
+
+
+ public SequenceExtraction() {
+ mAlignmentAlg = new SequenceAlignment<>(new AlignmentPricer<>((i1,i2) -> Math.abs(i1-i2), i -> 10));
+ }
+
+
+ public SequenceExtraction(SequenceAlignment<Integer> alignmentAlgorithm) {
+ mAlignmentAlg = alignmentAlgorithm;
+ }
+
+ /**
+ * Gets the {@link SequenceAlignment} used to perform the sequence extraction.
+ * @return the {@link SequenceAlignment} used to perform the sequence extraction.
+ */
+ public SequenceAlignment<Integer> getAlignmentAlgorithm() {
+ return mAlignmentAlg;
+ }
+
+ // Initial
+// /**
+// *
+// * @param convsForAction A set of {@link Conversation}s known to be associated with a single type of user action.
+// */
+// public void extract(List<Conversation> convsForAction) {
+// int maxDifference = 0;
+//
+// for (int i = 0; i < convsForAction.size(); i++) {
+// for (int j = i+1; j < convsForAction.size(); i++) {
+// Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
+// Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
+// int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
+// if (alignmentCost > maxDifference) {
+// maxDifference = alignmentCost;
+// }
+// }
+// }
+//
+// }
+
+
+// public void extract(Map<String, List<Conversation>> hostnameToConvs) {
+// int maxDifference = 0;
+//
+// for (int i = 0; i < convsForAction.size(); i++) {
+// for (int j = i+1; j < convsForAction.size(); i++) {
+// Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
+// Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
+// int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
+// if (alignmentCost > maxDifference) {
+// maxDifference = alignmentCost;
+// }
+// }
+// }
+//
+// }
+
+
+ public ExtractedSequence extract(List<Conversation> convsForActionForHostname) {
+ // First group conversations by packet sequences.
+ // TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here
+ // as it can potentially result in sequences that are equal in terms of payload packets to be considered
+ // different due to differences in how they are terminated.
+ Map<String, List<Conversation>> groupedBySequence =
+ TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname);
+ // Then get a hold of one of the conversations that gave rise to the most frequent sequence.
+ Conversation mostFrequentConv = null;
+ int maxFrequency = 0;
+ for (Map.Entry<String, List<Conversation>> seqMapEntry : groupedBySequence.entrySet()) {
+ if (seqMapEntry.getValue().size() > maxFrequency) {
+ // Found a more frequent sequence
+ maxFrequency = seqMapEntry.getValue().size();
+ // We just pick the first conversation as the representative conversation for this sequence type.
+ mostFrequentConv = seqMapEntry.getValue().get(0);
+ } else if (seqMapEntry.getValue().size() == maxFrequency) {
+ // This sequence has the same frequency as the max frequency seen so far.
+ // Break ties by choosing the longest sequence.
+ // First get an arbitrary representative of currently examined sequence; we just pick the first.
+ Conversation c = seqMapEntry.getValue().get(0);
+ mostFrequentConv = c.getPackets().size() > mostFrequentConv.getPackets().size() ? c : mostFrequentConv;
+ }
+ }
+ // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+ // each of the rest of the conversations also associated with this action and hostname.
+ int maxCost = 0;
+ final Integer[] mostFrequentConvSeq = TcpConversationUtils.getPacketLengthSequence(mostFrequentConv);
+ for (Conversation c : convsForActionForHostname) {
+ if (c == mostFrequentConv) {
+ // Don't compute distance to self.
+ continue;
+ }
+ Integer[] cSeq = TcpConversationUtils.getPacketLengthSequence(c);
+ int alignmentCost = mAlignmentAlg.calculateAlignment(mostFrequentConvSeq, cSeq);
+ if (alignmentCost > maxCost) {
+ maxCost = alignmentCost;
+ }
+ }
+ return new ExtractedSequence(mostFrequentConv, maxCost);
+ }
+
+
+
+}