Some exploratory work on extracting TLS App Data sequence
authorJanus Varmarken <varmarken@gmail.com>
Fri, 31 Aug 2018 22:21:27 +0000 (15:21 -0700)
committerJanus Varmarken <varmarken@gmail.com>
Fri, 31 Aug 2018 22:31:20 +0000 (15:31 -0700)
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java

index 197687025caed31a984287cc05615d038a128cc8..73eac010a8bb740d673b9ada018080bf2188d701 100644 (file)
@@ -238,6 +238,7 @@ public class Main {
             });
         }
 
             });
         }
 
+
         // Print out all the pairs into a file for ON events
         File fileOnEvents = new File(onPairsPath);
         PrintWriter pwOn = null;
         // Print out all the pairs into a file for ON events
         File fileOnEvents = new File(onPairsPath);
         PrintWriter pwOn = null;
@@ -324,43 +325,54 @@ public class Main {
         }
         pwOff.close();
 
         }
         pwOff.close();
 
+
         // ================================================================================================
         // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>>
         //
         // Currently need to know relevant hostname in advance :(
         // ================================================================================================
         // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>>
         //
         // Currently need to know relevant hostname in advance :(
-        String hostname = "events.tplinkra.com";
+//        String hostname = "events.tplinkra.com";
+        String hostname = "rfe-us-west-1.dch.dlink.com";
         // Conversations with 'hostname' for ON events.
         // Conversations with 'hostname' for ON events.
-//        List<Conversation> onsForHostname = new ArrayList<>();
-//        // Conversations with 'hostname' for OFF events.
-//        List<Conversation> offsForHostname = new ArrayList<>();
-//        // "Unwrap" sequence groupings in ons/offs maps.
-//        ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
-//        offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
-//        // Extract representative sequence for ON and OFF by providing the list of conversations with
-//        // 'hostname' observed for each event type (the training data).
-//        SequenceExtraction seqExtraction = new SequenceExtraction();
+        List<Conversation> onsForHostname = new ArrayList<>();
+        // Conversations with 'hostname' for OFF events.
+        List<Conversation> offsForHostname = new ArrayList<>();
+        // "Unwrap" sequence groupings in ons/offs maps.
+        ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
+        offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
+
+
+        Map<String, List<Conversation>> onsForHostnameGroupedByTlsAppDataSequence = TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(onsForHostname);
+
+
+        // Extract representative sequence for ON and OFF by providing the list of conversations with
+        // 'hostname' observed for each event type (the training data).
+        SequenceExtraction seqExtraction = new SequenceExtraction();
 //        ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname);
 //        ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname);
 //        ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname);
 //        ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname);
-//        // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
-//        // labeled).
-//        int onsLabeledAsOff = 0;
-//        Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
-//        Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
-//        SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
-//        for (Conversation c : onsForHostname) {
-//            Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
-//            if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
-//                onsLabeledAsOff++;
-//            }
-//        }
-//        int offsLabeledAsOn = 0;
-//        for (Conversation c : offsForHostname) {
-//            Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
-//            if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
-//                offsLabeledAsOn++;
-//            }
-//        }
-//        System.out.println("");
+
+        ExtractedSequence extractedSequenceForOn = seqExtraction.extractByTlsAppData(onsForHostname);
+        ExtractedSequence extractedSequenceForOff = seqExtraction.extractByTlsAppData(offsForHostname);
+
+        // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
+        // labeled).
+        int onsLabeledAsOff = 0;
+        Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
+        Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
+        SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
+        for (Conversation c : onsForHostname) {
+            Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
+            if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
+                onsLabeledAsOff++;
+            }
+        }
+        int offsLabeledAsOn = 0;
+        for (Conversation c : offsForHostname) {
+            Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
+            if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
+                offsLabeledAsOn++;
+            }
+        }
+        System.out.println("");
         // ================================================================================================
 
 
         // ================================================================================================
 
 
index 423e3c85d7741df89488ccb1066eee31975da024..297107d66c10e7001dd7be8aa61d7aead8f83aef 100644 (file)
@@ -1,6 +1,9 @@
 package edu.uci.iotproject.comparison.seqalignment;
 
 import edu.uci.iotproject.Conversation;
 package edu.uci.iotproject.comparison.seqalignment;
 
 import edu.uci.iotproject.Conversation;
+import org.pcap4j.core.PcapPacket;
+
+import java.util.List;
 
 /**
  * TODO add class documentation.
 
 /**
  * TODO add class documentation.
@@ -15,11 +18,12 @@ public class ExtractedSequence {
 
     private final String mSequenceString;
 
 
     private final String mSequenceString;
 
-    public ExtractedSequence(Conversation sequence, int maxAlignmentCost) {
+    public ExtractedSequence(Conversation sequence, int maxAlignmentCost, boolean tlsAppDataAlignment) {
         mRepresentativeSequence = sequence;
         mMaxAlignmentCost = maxAlignmentCost;
         StringBuilder sb = new StringBuilder();
         mRepresentativeSequence = sequence;
         mMaxAlignmentCost = maxAlignmentCost;
         StringBuilder sb = new StringBuilder();
-        sequence.getPackets().forEach(p -> {
+        List<PcapPacket> pkts = tlsAppDataAlignment ? sequence.getTlsApplicationDataPackets() : sequence.getPackets();
+        pkts.forEach(p -> {
             if (sb.length() != 0) sb.append(" ");
             sb.append(p.getOriginalLength());
         });
             if (sb.length() != 0) sb.append(" ");
             sb.append(p.getOriginalLength());
         });
index 8003670b591e3a82b74ecb99143f621dc9fece55..e20850187201cac6adc9b8caf2d94485db8ffa09 100644 (file)
@@ -3,8 +3,10 @@ package edu.uci.iotproject.comparison.seqalignment;
 import edu.uci.iotproject.Conversation;
 import edu.uci.iotproject.analysis.TcpConversationUtils;
 
 import edu.uci.iotproject.Conversation;
 import edu.uci.iotproject.analysis.TcpConversationUtils;
 
+import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 /**
  * TODO add class documentation.
 
 /**
  * TODO add class documentation.
@@ -72,7 +74,7 @@ public class SequenceExtraction {
 //
 //    }
 
 //
 //    }
 
-
+    // Building signature from entire sequence
     public ExtractedSequence extract(List<Conversation> convsForActionForHostname) {
         // First group conversations by packet sequences.
         // TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here
     public ExtractedSequence extract(List<Conversation> convsForActionForHostname) {
         // First group conversations by packet sequences.
         // TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here
@@ -80,6 +82,7 @@ public class SequenceExtraction {
         // different due to differences in how they are terminated.
         Map<String, List<Conversation>> groupedBySequence =
                 TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname);
         // different due to differences in how they are terminated.
         Map<String, List<Conversation>> groupedBySequence =
                 TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname);
+
         // Then get a hold of one of the conversations that gave rise to the most frequent sequence.
         Conversation mostFrequentConv = null;
         int maxFrequency = 0;
         // Then get a hold of one of the conversations that gave rise to the most frequent sequence.
         Conversation mostFrequentConv = null;
         int maxFrequency = 0;
@@ -112,9 +115,39 @@ public class SequenceExtraction {
                 maxCost = alignmentCost;
             }
         }
                 maxCost = alignmentCost;
             }
         }
-        return new ExtractedSequence(mostFrequentConv, maxCost);
+        return new ExtractedSequence(mostFrequentConv, maxCost, false);
     }
 
     }
 
-
+    // Building signature from only TLS Application Data packets
+    public ExtractedSequence extractByTlsAppData(List<Conversation> convsForActionForHostname) {
+        // TODO: temporary hack to avoid 97-only conversations for dlink plug. We need some preprocessing/data cleaning.
+        convsForActionForHostname = convsForActionForHostname.stream().filter(c -> c.getTlsApplicationDataPackets().size() > 1).collect(Collectors.toList());
+
+        Map<String, List<Conversation>> groupedByTlsAppDataSequence =
+                TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(convsForActionForHostname);
+        // Get a Conversation representing the most frequent TLS application data sequence.
+        Conversation mostFrequentConv = groupedByTlsAppDataSequence.values().stream().max((l1, l2) -> {
+            // The frequency of a conversation with a specific packet sequence is the list size as that represents how
+            // many conversations exhibit that packet sequence.
+            // Hence, the difference between the list sizes can be used directly as the return value of the Comparator.
+            // Note: we break ties by choosing the one with the most TLS application data packets (i.e., the longest
+            // sequence) in case the frequencies are equal.
+            int diff = l1.size() - l2.size();
+            return diff != 0 ? diff : l1.get(0).getTlsApplicationDataPackets().size() - l2.get(0).getTlsApplicationDataPackets().size();
+        }).get().get(0); // Just pick the first as a representative of the most frequent sequence.
+        // Lengths of TLS Application Data packets in the most frequent (or most frequent and longest) conversation.
+        Integer[] mostFreqSeq = TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(mostFrequentConv);
+        // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+        // each of the rest of the conversations also associated with this action and hostname.
+        int maxCost = 0;
+        for (Conversation c : convsForActionForHostname) {
+            if (c == mostFrequentConv) continue;
+            int cost = mAlignmentAlg.calculateAlignment(mostFreqSeq, TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(c));
+            maxCost = cost > maxCost ? cost : maxCost;
+        }
+        return new ExtractedSequence(mostFrequentConv, maxCost, true);
+        // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+        // each of the rest of the conversations also associated with this action and hostname.
+    }
 
 }
 
 }