Adding some changes related to the second batch of experiments to cluster pairs.
authorrtrimana <rtrimana@uci.edu>
Thu, 6 Sep 2018 00:52:37 +0000 (17:52 -0700)
committerrtrimana <rtrimana@uci.edu>
Thu, 6 Sep 2018 00:52:37 +0000 (17:52 -0700)
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java
python_ml/plotting-dbscan-complete.py
python_ml/plotting-dbscan.py

index 98b4c270e94a305d9a3de81fe68055bfeca058a2..be9c34df47809cbbdc9a2367c29d2475c76150dd 100644 (file)
@@ -39,8 +39,8 @@ public class Main {
         // ------------ # Code for extracting traffic generated by a device within x seconds of a trigger # ------------
         // Paths to input and output files (consider supplying these as arguments instead) and IP of the device for
         // which traffic is to be extracted:
-//        String path = "/scratch/July-2018"; // Rahmadi
-        String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
+        String path = "/scratch/July-2018"; // Rahmadi
+//        String path = "/Users/varmarken/temp/UCI IoT Project/experiments"; // Janus
         boolean verbose = true;
         final String onPairsPath = "/scratch/July-2018/on.txt";
         final String offPairsPath = "/scratch/July-2018/off.txt";
@@ -52,10 +52,10 @@ public class Main {
 //        final String deviceIp = "192.168.1.246"; // .246 == phone; .199 == dlink plug?
 
         // 2) TP-Link July 25 experiment
-        final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.pcap";
-        final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.pcap";
-        final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.timestamps";
-        final String deviceIp = "192.168.1.159";
+//        final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.pcap";
+//        final String outputPcapFile = path + "/2018-07/tplink/tplink-processed.pcap";
+//        final String triggerTimesFile = path + "/2018-07/tplink/tplink-july-25-2018.timestamps";
+//        final String deviceIp = "192.168.1.159";
 
         // 2b) TP-Link July 25 experiment TRUNCATED:
         // Only contains "true local" events, i.e., before the behavior changes to remote-like behavior.
@@ -105,7 +105,7 @@ public class Main {
 //        final String inputPcapFile = path + "/2018-08/lifx-bulb/lifx-bulb.wlan1.local.pcap";
 //        final String outputPcapFile = path + "/2018-08/lifx-bulb/lifx-bulb-processed.pcap";
 //        final String triggerTimesFile = path + "/2018-08/lifx-bulb/lifx-bulb-aug-8-2018.timestamps";
-//        final String deviceIp = "192.168.1.231"; // .246 == phone; .231 == Lifx
+//        final String deviceIp = "192.168.1.246"; // .246 == phone; .231 == Lifx
 
         // 10) Amcrest Camera August 9 experiment
 //        final String inputPcapFile = path + "/2018-08/amcrest-camera/amcrest-camera.wlan1.local.pcap";
@@ -143,10 +143,10 @@ public class Main {
 //        final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-16-2018.timestamps";
 //        final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa
         // August 17
-//        final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap";
-//        final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap";
-//        final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps";
-//        final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa
+        final String inputPcapFile = path + "/2018-08/alexa/alexa2.wlan1.local.pcap";
+        final String outputPcapFile = path + "/2018-08/alexa/alexa2-processed.pcap";
+        final String triggerTimesFile = path + "/2018-08/alexa/alexa-aug-17-2018.timestamps";
+        final String deviceIp = "192.168.1.225"; // .246 == phone; .225 == Alexa
 
         TriggerTimesFileReader ttfr = new TriggerTimesFileReader();
         List<Instant> triggerTimes = ttfr.readTriggerTimes(triggerTimesFile, false);
@@ -273,7 +273,6 @@ public class Main {
                         // The length of the first packet
                         int firstLen = 0;
                         for (PcapPacket pcap : packetList) {
-                            System.out.println(pcap.length() + ", " + conv.getDirection(pcap));
                             boolean isPair = false;
                             if (count % 2 == 0) {
                                 firstDir = conv.getDirection(pcap);
@@ -282,8 +281,8 @@ public class Main {
                                 if(conv.getDirection(pcap) != firstDir) {
                                     isPair = true;
                                     pwOn.println(firstLen + ", " + pcap.length());
-                                    //System.out.println(firstDir + ", " + conv.getDirection(pcap));
-                                    //System.out.println(firstLen + ", " + pcap.length());
+                                    System.out.println(firstDir + ", " + conv.getDirection(pcap));
+                                    System.out.println(firstLen + ", " + pcap.length());
                                 }
                             }
                             count++;
@@ -351,7 +350,6 @@ public class Main {
                         // The length of the first packet
                         int firstLen = 0;
                         for (PcapPacket pcap : packetList) {
-                            System.out.println(pcap.length() + ", " + conv.getDirection(pcap));
                             boolean isPair = false;
                             if (count % 2 == 0) {
                                 firstDir = conv.getDirection(pcap);
@@ -377,58 +375,58 @@ public class Main {
         pwOff.close();
 
 
-        // ================================================================================================
-        // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>>
-        //
-        // Currently need to know relevant hostname in advance :(
-        String hostname = "events.tplinkra.com";
-//        String hostname = "rfe-us-west-1.dch.dlink.com";
-        // Conversations with 'hostname' for ON events.
-        List<Conversation> onsForHostname = new ArrayList<>();
-        // Conversations with 'hostname' for OFF events.
-        List<Conversation> offsForHostname = new ArrayList<>();
-        // "Unwrap" sequence groupings in ons/offs maps.
-        ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
-        offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
-
-
-        Map<String, List<Conversation>> onsForHostnameGroupedByTlsAppDataSequence = TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(onsForHostname);
-
-
-        // Extract representative sequence for ON and OFF by providing the list of conversations with
-        // 'hostname' observed for each event type (the training data).
-        SequenceExtraction seqExtraction = new SequenceExtraction();
-//        ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname);
-//        ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname);
-
-        ExtractedSequence extractedSequenceForOn = seqExtraction.extractByTlsAppData(onsForHostname);
-        ExtractedSequence extractedSequenceForOff = seqExtraction.extractByTlsAppData(offsForHostname);
-
-        // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
-        // labeled).
-        int onsLabeledAsOff = 0;
-        Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
-        Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
-        SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
-        for (Conversation c : onsForHostname) {
-            Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
-            if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
-                onsLabeledAsOff++;
-            }
-        }
-        int offsLabeledAsOn = 0;
-        for (Conversation c : offsForHostname) {
-            Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
-            if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
-                offsLabeledAsOn++;
-            }
-        }
-        System.out.println("");
-        // ================================================================================================
-
-
-        // -------------------------------------------------------------------------------------------------------------
-        // -------------------------------------------------------------------------------------------------------------
+//        // ================================================================================================
+//        // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>>
+//        //
+//        // Currently need to know relevant hostname in advance :(
+//        String hostname = "events.tplinkra.com";
+////        String hostname = "rfe-us-west-1.dch.dlink.com";
+//        // Conversations with 'hostname' for ON events.
+//        List<Conversation> onsForHostname = new ArrayList<>();
+//        // Conversations with 'hostname' for OFF events.
+//        List<Conversation> offsForHostname = new ArrayList<>();
+//        // "Unwrap" sequence groupings in ons/offs maps.
+//        ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
+//        offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
+//
+//
+//        Map<String, List<Conversation>> onsForHostnameGroupedByTlsAppDataSequence = TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(onsForHostname);
+//
+//
+//        // Extract representative sequence for ON and OFF by providing the list of conversations with
+//        // 'hostname' observed for each event type (the training data).
+//        SequenceExtraction seqExtraction = new SequenceExtraction();
+////        ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname);
+////        ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname);
+//
+//        ExtractedSequence extractedSequenceForOn = seqExtraction.extractByTlsAppData(onsForHostname);
+//        ExtractedSequence extractedSequenceForOff = seqExtraction.extractByTlsAppData(offsForHostname);
+//
+//        // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
+//        // labeled).
+//        int onsLabeledAsOff = 0;
+//        Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
+//        Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
+//        SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
+//        for (Conversation c : onsForHostname) {
+//            Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
+//            if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
+//                onsLabeledAsOff++;
+//            }
+//        }
+//        int offsLabeledAsOn = 0;
+//        for (Conversation c : offsForHostname) {
+//            Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
+//            if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
+//                offsLabeledAsOn++;
+//            }
+//        }
+//        System.out.println("");
+//        // ================================================================================================
+//
+//
+//        // -------------------------------------------------------------------------------------------------------------
+//        // -------------------------------------------------------------------------------------------------------------
     }
 
 }
index 55d5af4638306220119dcd8ef87f73b991023ed4..a2baa902c63104e52c02a2c6d24b07f4c5f60760 100644 (file)
@@ -13,8 +13,8 @@ fig.set_size_inches(7, 7)
 # TODO: Just change the following path and filename 
 #      when needed to read from a different file
 path = "/scratch/July-2018/Pairs2/"
-device1 = "kwikset-on"
-device2 = "kwikset-off"
+device1 = "alexa2-on"
+device2 = "alexa2-off"
 filename1 = device1 + ".txt"
 filename2 = device2 + ".txt"
 
@@ -38,7 +38,7 @@ X = np.array(pairsArr);
 # Compute DBSCAN
 # eps = distances
 # min_samples = minimum number of members of a cluster
-db = DBSCAN(eps=30, min_samples=trig - 5).fit(X)
+db = DBSCAN(eps=10, min_samples=trig - 5).fit(X)
 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
 core_samples_mask[db.core_sample_indices_] = True
 labels = db.labels_
@@ -76,7 +76,7 @@ for pair in pairsArr:
        else:
        # Only print the frequency when this is a real cluster
                plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + 
-                       "\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10)
+                       "\nFreq:" + str(labels.tolist().count(labels[count])), fontsize=10)
        count = count + 1
 
 #====================================================================================================
@@ -138,7 +138,7 @@ for pair in pairsArr:
        else:
        # Only print the frequency when this is a real cluster
                plt.text(pair[0], pair[1], str(pair[0]) + ", " + str(pair[1]) + 
-                       "\nFreq: " + str(labels.tolist().count(labels[count])), fontsize=10)
+                       "\nFreq:" + str(labels.tolist().count(labels[count])), fontsize=10)
        count = count + 1
 
 
index 733fe16243e461afcdaf466186d8bc96e3359867..bdfc1d5302ac0c5442b7d8cd266c09ff049b3bcf 100644 (file)
@@ -13,7 +13,7 @@ fig.set_size_inches(7, 7)
 # TODO: Just change the following path and filename 
 #      when needed to read from a different file
 path = "/scratch/July-2018/Pairs2/"
-device = "kwikset-off"
+device = "alexa2-off"
 filename = device + ".txt"
 
 # Number of triggers
@@ -35,7 +35,7 @@ X = np.array(pairsArr);
 # Compute DBSCAN
 # eps = distances
 # min_samples = minimum number of members of a cluster
-db = DBSCAN(eps=10, min_samples=trig - 5).fit(X)
+db = DBSCAN(eps=20, min_samples=trig - 5).fit(X)
 core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
 core_samples_mask[db.core_sample_indices_] = True
 labels = db.labels_