From 2317de543977ae3e1e3a8da254614602a429810c Mon Sep 17 00:00:00 2001 From: rtrimana Date: Wed, 5 Sep 2018 17:52:37 -0700 Subject: [PATCH] Adding some changes related to the second batch of experiments to cluster pairs. --- .../main/java/edu/uci/iotproject/Main.java | 132 +++++++++--------- python_ml/plotting-dbscan-complete.py | 10 +- python_ml/plotting-dbscan.py | 4 +- 3 files changed, 72 insertions(+), 74 deletions(-) diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java index 98b4c27..be9c34d 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java @@ -39,8 +39,8 @@ public class Main { // ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------ // Paths to input and output files (consider supplying these as arguments instead) and IP of the device for // which traffic is to be extracted: -// String path = "/scratch/July-2018"; // Rahmadi - String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus + String path = "/scratch/July-2018"; // Rahmadi +// String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus boolean verbose = true; final String onPairsPath = "/scratch/July-2018/on.txt"; final String offPairsPath = "/scratch/July-2018/off.txt"; @@ -52,10 +52,10 @@ public class Main { // final String deviceIp = "192.168.1.246"; // .246 == phone; .199 == dlink plug? // 2) TP-Link July 25 experiment - final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.pcap"; - final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.pcap"; - final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.timestamps"; - final String deviceIp = "192.168.1.159"; +// final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.pcap"; +// final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.pcap"; +// final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.timestamps"; +// final String deviceIp = "192.168.1.159"; // 2b) TP-Link July 25 experiment TRUNCATED: // Only contains "true local" events, i.e., before the behavior changes to remote-like behavior. @@ -105,7 +105,7 @@ public class Main { // final String inputPcapFile = path + "/2018-08/lifx-bulb/lifx-bulb.wlan1.local.pcap"; // final String outputPcapFile = path + "/2018-08/lifx-bulb/lifx-bulb-processed.pcap"; // final String triggerTimesFile = path + "/2018-08/lifx-bulb/lifx-bulb-aug-8-2018.timestamps"; -// final String deviceIp = "192.168.1.231"; // .246 == phone; .231 == Lifx +// final String deviceIp = "192.168.1.246"; // .246 == phone; .231 == Lifx // 10) Amcrest Camera August 9 experiment // final String inputPcapFile = path + "/2018-08/amcrest-camera/amcrest-camera.wlan1.local.pcap"; @@ -143,10 +143,10 @@ public class Main { // final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-16-2018.timestamps"; // final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa // August 17 -// final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap"; -// final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap"; -// final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps"; -// final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa + final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap"; + final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap"; + final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps"; + final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa TriggerTimesFileReader ttfr = new TriggerTimesFileReader(); List triggerTimes = ttfr.readTriggerTimes(triggerTimesFile, false); @@ -273,7 +273,6 @@ public class Main { // The length of the first packet int firstLen = 0; for (PcapPacket pcap : packetList) { - System.out.println(pcap.length() + ", " + conv.getDirection(pcap)); boolean isPair = false; if (count % 2 == 0) { firstDir = conv.getDirection(pcap); @@ -282,8 +281,8 @@ public class Main { if(conv.getDirection(pcap) != firstDir) { isPair = true; pwOn.println(firstLen + ", " + pcap.length()); - //System.out.println(firstDir + ", " + conv.getDirection(pcap)); - //System.out.println(firstLen + ", " + pcap.length()); + System.out.println(firstDir + ", " + conv.getDirection(pcap)); + System.out.println(firstLen + ", " + pcap.length()); } } count++; @@ -351,7 +350,6 @@ public class Main { // The length of the first packet int firstLen = 0; for (PcapPacket pcap : packetList) { - System.out.println(pcap.length() + ", " + conv.getDirection(pcap)); boolean isPair = false; if (count % 2 == 0) { firstDir = conv.getDirection(pcap); @@ -377,58 +375,58 @@ public class Main { pwOff.close(); - // ================================================================================================ - // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>> - // - // Currently need to know relevant hostname in advance :( - String hostname = "events.tplinkra.com"; -// String hostname = "rfe-us-west-1.dch.dlink.com"; - // Conversations with 'hostname' for ON events. - List onsForHostname = new ArrayList<>(); - // Conversations with 'hostname' for OFF events. - List offsForHostname = new ArrayList<>(); - // "Unwrap" sequence groupings in ons/offs maps. - ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v)); - offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v)); - - - Map> onsForHostnameGroupedByTlsAppDataSequence = TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(onsForHostname); - - - // Extract representative sequence for ON and OFF by providing the list of conversations with - // 'hostname' observed for each event type (the training data). - SequenceExtraction seqExtraction = new SequenceExtraction(); -// ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname); -// ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname); - - ExtractedSequence extractedSequenceForOn = seqExtraction.extractByTlsAppData(onsForHostname); - ExtractedSequence extractedSequenceForOff = seqExtraction.extractByTlsAppData(offsForHostname); - - // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly - // labeled). - int onsLabeledAsOff = 0; - Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence()); - Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence()); - SequenceAlignment seqAlg = seqExtraction.getAlignmentAlgorithm(); - for (Conversation c : onsForHostname) { - Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c); - if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) { - onsLabeledAsOff++; - } - } - int offsLabeledAsOn = 0; - for (Conversation c : offsForHostname) { - Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c); - if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) { - offsLabeledAsOn++; - } - } - System.out.println(""); - // ================================================================================================ - - - // ------------------------------------------------------------------------------------------------------------- - // ------------------------------------------------------------------------------------------------------------- +// // ================================================================================================ +// // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>> +// // +// // Currently need to know relevant hostname in advance :( +// String hostname = "events.tplinkra.com"; +//// String hostname = "rfe-us-west-1.dch.dlink.com"; +// // Conversations with 'hostname' for ON events. +// List onsForHostname = new ArrayList<>(); +// // Conversations with 'hostname' for OFF events. +// List offsForHostname = new ArrayList<>(); +// // "Unwrap" sequence groupings in ons/offs maps. +// ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v)); +// offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v)); +// +// +// Map> onsForHostnameGroupedByTlsAppDataSequence = TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(onsForHostname); +// +// +// // Extract representative sequence for ON and OFF by providing the list of conversations with +// // 'hostname' observed for each event type (the training data). +// SequenceExtraction seqExtraction = new SequenceExtraction(); +//// ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname); +//// ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname); +// +// ExtractedSequence extractedSequenceForOn = seqExtraction.extractByTlsAppData(onsForHostname); +// ExtractedSequence extractedSequenceForOff = seqExtraction.extractByTlsAppData(offsForHostname); +// +// // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly +// // labeled). +// int onsLabeledAsOff = 0; +// Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence()); +// Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence()); +// SequenceAlignment seqAlg = seqExtraction.getAlignmentAlgorithm(); +// for (Conversation c : onsForHostname) { +// Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c); +// if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) { +// onsLabeledAsOff++; +// } +// } +// int offsLabeledAsOn = 0; +// for (Conversation c : offsForHostname) { +// Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c); +// if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) { +// offsLabeledAsOn++; +// } +// } +// System.out.println(""); +// // ================================================================================================ +// +// +// // ------------------------------------------------------------------------------------------------------------- +// // ------------------------------------------------------------------------------------------------------------- } } diff --git a/python_ml/plotting-dbscan-complete.py b/python_ml/plotting-dbscan-complete.py index 55d5af4..a2baa90 100644 --- a/python_ml/plotting-dbscan-complete.py +++ b/python_ml/plotting-dbscan-complete.py @@ -13,8 +13,8 @@ fig.set_size_inches(7, 7) # TODO: Just change the following path and filename # when needed to read from a different file path = "/scratch/July-2018/Pairs2/" -device1 = "kwikset-on" -device2 = "kwikset-off" +device1 = "alexa2-on" +device2 = "alexa2-off" filename1 = device1 + ".txt" filename2 = device2 + ".txt" @@ -38,7 +38,7 @@ X = np.array(pairsArr); # Compute DBSCAN # eps = distances # min_samples = minimum number of members of a cluster -db = DBSCAN(eps=30, min_samples=trig - 5).fit(X) +db = DBSCAN(eps=10, min_samples=trig - 5).fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ @@ -76,7 +76,7 @@ for pair in pairsArr: else: # Only print the frequency when this is a real cluster plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + - "\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10) + "\nFreq:" + str(labels.tolist().count(labels[count])), fontsize=10) count = count + 1 #==================================================================================================== @@ -138,7 +138,7 @@ for pair in pairsArr: else: # Only print the frequency when this is a real cluster plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + - "\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10) + "\nFreq:" + str(labels.tolist().count(labels[count])), fontsize=10) count = count + 1 diff --git a/python_ml/plotting-dbscan.py b/python_ml/plotting-dbscan.py index 733fe16..bdfc1d5 100644 --- a/python_ml/plotting-dbscan.py +++ b/python_ml/plotting-dbscan.py @@ -13,7 +13,7 @@ fig.set_size_inches(7, 7) # TODO: Just change the following path and filename # when needed to read from a different file path = "/scratch/July-2018/Pairs2/" -device = "kwikset-off" +device = "alexa2-off" filename = device + ".txt" # Number of triggers @@ -35,7 +35,7 @@ X = np.array(pairsArr); # Compute DBSCAN # eps = distances # min_samples = minimum number of members of a cluster -db = DBSCAN(eps=10, min_samples=trig - 5).fit(X) +db = DBSCAN(eps=20, min_samples=trig - 5).fit(X) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ -- 2.34.1