Merge branch 'master' of https://github.uci.edu/rtrimana/smart_home_traffic
authorrtrimana <rtrimana@uci.edu>
Sat, 1 Sep 2018 00:00:38 +0000 (17:00 -0700)
committerrtrimana <rtrimana@uci.edu>
Sat, 1 Sep 2018 00:00:38 +0000 (17:00 -0700)
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Conversation.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java

index 8beade2e670248f4d5a5897e52a8115dbf78ac94..28515e3ae981fffe6b9b33c4b19c0cc3d73bba78 100644 (file)
@@ -479,8 +479,10 @@ public class Conversation {
          *   i.e., when the trace does not contain the SYN/SYNACK exchange.
          * - current implementation relies on the server using the conventional TLS port number; may instead want to
          *   inspect the first 4 bytes of each potential TLS packet to see if they match the SSL record header.
+         *
+         * 08/31/18: Added unconvetional TLS ports used by WeMo plugs and LiFX bulb.
          */
-        return mServerPort == 443;
+        return mServerPort == 443 || mServerPort == 8443 || mServerPort == 41143;
     }
 
     /**
index 669f7c0c03c94190e0e9624f32e2085ba999d61e..12edeb02ef334fa18792c9bef4d1b305ef5d2e88 100644 (file)
@@ -238,6 +238,7 @@ public class Main {
             });
         }
 
+
         // Print out all the pairs into a file for ON events
         File fileOnEvents = new File(onPairsPath);
         PrintWriter pwOn = null;
@@ -324,43 +325,54 @@ public class Main {
         }
         pwOff.close();
 
+
         // ================================================================================================
         // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>>
         //
         // Currently need to know relevant hostname in advance :(
-        String hostname = "events.tplinkra.com";
+//        String hostname = "events.tplinkra.com";
+        String hostname = "rfe-us-west-1.dch.dlink.com";
         // Conversations with 'hostname' for ON events.
-//        List<Conversation> onsForHostname = new ArrayList<>();
-//        // Conversations with 'hostname' for OFF events.
-//        List<Conversation> offsForHostname = new ArrayList<>();
-//        // "Unwrap" sequence groupings in ons/offs maps.
-//        ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
-//        offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
-//        // Extract representative sequence for ON and OFF by providing the list of conversations with
-//        // 'hostname' observed for each event type (the training data).
-//        SequenceExtraction seqExtraction = new SequenceExtraction();
+        List<Conversation> onsForHostname = new ArrayList<>();
+        // Conversations with 'hostname' for OFF events.
+        List<Conversation> offsForHostname = new ArrayList<>();
+        // "Unwrap" sequence groupings in ons/offs maps.
+        ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
+        offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
+
+
+        Map<String, List<Conversation>> onsForHostnameGroupedByTlsAppDataSequence = TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(onsForHostname);
+
+
+        // Extract representative sequence for ON and OFF by providing the list of conversations with
+        // 'hostname' observed for each event type (the training data).
+        SequenceExtraction seqExtraction = new SequenceExtraction();
 //        ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname);
 //        ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname);
-//        // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
-//        // labeled).
-//        int onsLabeledAsOff = 0;
-//        Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
-//        Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
-//        SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
-//        for (Conversation c : onsForHostname) {
-//            Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
-//            if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
-//                onsLabeledAsOff++;
-//            }
-//        }
-//        int offsLabeledAsOn = 0;
-//        for (Conversation c : offsForHostname) {
-//            Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
-//            if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
-//                offsLabeledAsOn++;
-//            }
-//        }
-//        System.out.println("");
+
+        ExtractedSequence extractedSequenceForOn = seqExtraction.extractByTlsAppData(onsForHostname);
+        ExtractedSequence extractedSequenceForOff = seqExtraction.extractByTlsAppData(offsForHostname);
+
+        // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
+        // labeled).
+        int onsLabeledAsOff = 0;
+        Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
+        Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
+        SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
+        for (Conversation c : onsForHostname) {
+            Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
+            if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
+                onsLabeledAsOff++;
+            }
+        }
+        int offsLabeledAsOn = 0;
+        for (Conversation c : offsForHostname) {
+            Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
+            if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
+                offsLabeledAsOn++;
+            }
+        }
+        System.out.println("");
         // ================================================================================================
 
 
index f43077db4f5563d8b9e8bc2555d009c467b0b647..dc38358f859556983c2e33f6124ea6e2dc814049 100644 (file)
@@ -9,6 +9,7 @@ import org.pcap4j.packet.IpV4Packet;
 import org.pcap4j.packet.TcpPacket;
 
 import java.util.*;
+import java.util.stream.Collectors;
 
 /**
  * Utility functions for analyzing and structuring (sets of) {@link Conversation}s.
@@ -242,6 +243,13 @@ public class TcpConversationUtils {
         return result;
     }
 
+    public static Map<String, List<Conversation>> groupConversationsByTlsApplicationDataPacketSequence(Collection<Conversation> conversations) {
+        return conversations.stream().collect(Collectors.groupingBy(
+                c -> c.getTlsApplicationDataPackets().stream().map(p -> Integer.toString(p.getOriginalLength())).
+                        reduce("", (s1, s2) -> s1.length() == 0 ? s2 : s1 + " " + s2))
+        );
+    }
+
     /**
      * Given a {@link Conversation}, counts the frequencies of each unique packet length seen as part of the
      * {@code Conversation}.
@@ -304,14 +312,35 @@ public class TcpConversationUtils {
      *         packet lengths in the returned array are ordered by packet timestamp.
      */
     public static Integer[] getPacketLengthSequence(Conversation c) {
-        List<PcapPacket> packets = c.getPackets();
-        Integer[] packetLengthSequence = new Integer[packets.size()];
-        for (int i = 0; i < packetLengthSequence.length; i++) {
-            packetLengthSequence[i] = packets.get(i).getOriginalLength();
+        return getPacketLengthSequence(c.getPackets());
+    }
+
+
+    /**
+     * Given a {@link Conversation}, extract its packet length sequence, but only include packet lengths of those
+     * packets that carry TLS Application Data.
+     * @param c The {@link Conversation} from which a TLS Application Data packet length sequence is to be extracted.
+     * @return An {@code Integer[]} that holds the packet lengths of all packets in {@code c} that carry TLS Application
+     *         Data. The packet lengths in the returned array are ordered by packet timestamp.
+     */
+    public static Integer[] getPacketLengthSequenceTlsAppDataOnly(Conversation c) {
+        if (!c.isTls()) {
+            throw new IllegalArgumentException("Provided " + c.getClass().getSimpleName() + " was not a TLS session");
         }
-        return packetLengthSequence;
+        return getPacketLengthSequence(c.getTlsApplicationDataPackets());
+    }
+
+    /**
+     * Given a list of packets, extract the packet lengths and wrap them in an array such that the packet lengths in the
+     * resulting array appear in the same order as their corresponding packets in the input list.
+     * @param packets The list of packets for which the packet lengths are to be extracted.
+     * @return An array containing the packet lengths in the same order as their corresponding packets in the input list.
+     */
+    private static Integer[] getPacketLengthSequence(List<PcapPacket> packets) {
+        return packets.stream().map(pkt -> pkt.getOriginalLength()).toArray(Integer[]::new);
     }
 
+
     /**
      * Appends a space to {@code sb} <em>iff</em> {@code sb} already contains some content.
      * @param sb A {@link StringBuilder} that should have a space appended <em>iff</em> it is not empty.
index 423e3c85d7741df89488ccb1066eee31975da024..297107d66c10e7001dd7be8aa61d7aead8f83aef 100644 (file)
@@ -1,6 +1,9 @@
 package edu.uci.iotproject.comparison.seqalignment;
 
 import edu.uci.iotproject.Conversation;
+import org.pcap4j.core.PcapPacket;
+
+import java.util.List;
 
 /**
  * TODO add class documentation.
@@ -15,11 +18,12 @@ public class ExtractedSequence {
 
     private final String mSequenceString;
 
-    public ExtractedSequence(Conversation sequence, int maxAlignmentCost) {
+    public ExtractedSequence(Conversation sequence, int maxAlignmentCost, boolean tlsAppDataAlignment) {
         mRepresentativeSequence = sequence;
         mMaxAlignmentCost = maxAlignmentCost;
         StringBuilder sb = new StringBuilder();
-        sequence.getPackets().forEach(p -> {
+        List<PcapPacket> pkts = tlsAppDataAlignment ? sequence.getTlsApplicationDataPackets() : sequence.getPackets();
+        pkts.forEach(p -> {
             if (sb.length() != 0) sb.append(" ");
             sb.append(p.getOriginalLength());
         });
index 8003670b591e3a82b74ecb99143f621dc9fece55..e20850187201cac6adc9b8caf2d94485db8ffa09 100644 (file)
@@ -3,8 +3,10 @@ package edu.uci.iotproject.comparison.seqalignment;
 import edu.uci.iotproject.Conversation;
 import edu.uci.iotproject.analysis.TcpConversationUtils;
 
+import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 /**
  * TODO add class documentation.
@@ -72,7 +74,7 @@ public class SequenceExtraction {
 //
 //    }
 
-
+    // Building signature from entire sequence
     public ExtractedSequence extract(List<Conversation> convsForActionForHostname) {
         // First group conversations by packet sequences.
         // TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here
@@ -80,6 +82,7 @@ public class SequenceExtraction {
         // different due to differences in how they are terminated.
         Map<String, List<Conversation>> groupedBySequence =
                 TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname);
+
         // Then get a hold of one of the conversations that gave rise to the most frequent sequence.
         Conversation mostFrequentConv = null;
         int maxFrequency = 0;
@@ -112,9 +115,39 @@ public class SequenceExtraction {
                 maxCost = alignmentCost;
             }
         }
-        return new ExtractedSequence(mostFrequentConv, maxCost);
+        return new ExtractedSequence(mostFrequentConv, maxCost, false);
     }
 
-
+    // Building signature from only TLS Application Data packets
+    public ExtractedSequence extractByTlsAppData(List<Conversation> convsForActionForHostname) {
+        // TODO: temporary hack to avoid 97-only conversations for dlink plug. We need some preprocessing/data cleaning.
+        convsForActionForHostname = convsForActionForHostname.stream().filter(c -> c.getTlsApplicationDataPackets().size() > 1).collect(Collectors.toList());
+
+        Map<String, List<Conversation>> groupedByTlsAppDataSequence =
+                TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(convsForActionForHostname);
+        // Get a Conversation representing the most frequent TLS application data sequence.
+        Conversation mostFrequentConv = groupedByTlsAppDataSequence.values().stream().max((l1, l2) -> {
+            // The frequency of a conversation with a specific packet sequence is the list size as that represents how
+            // many conversations exhibit that packet sequence.
+            // Hence, the difference between the list sizes can be used directly as the return value of the Comparator.
+            // Note: we break ties by choosing the one with the most TLS application data packets (i.e., the longest
+            // sequence) in case the frequencies are equal.
+            int diff = l1.size() - l2.size();
+            return diff != 0 ? diff : l1.get(0).getTlsApplicationDataPackets().size() - l2.get(0).getTlsApplicationDataPackets().size();
+        }).get().get(0); // Just pick the first as a representative of the most frequent sequence.
+        // Lengths of TLS Application Data packets in the most frequent (or most frequent and longest) conversation.
+        Integer[] mostFreqSeq = TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(mostFrequentConv);
+        // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+        // each of the rest of the conversations also associated with this action and hostname.
+        int maxCost = 0;
+        for (Conversation c : convsForActionForHostname) {
+            if (c == mostFrequentConv) continue;
+            int cost = mAlignmentAlg.calculateAlignment(mostFreqSeq, TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(c));
+            maxCost = cost > maxCost ? cost : maxCost;
+        }
+        return new ExtractedSequence(mostFrequentConv, maxCost, true);
+        // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+        // each of the rest of the conversations also associated with this action and hostname.
+    }
 
 }