From c07db18e8751dbf47f32daae63bb9ae419e7b049 Mon Sep 17 00:00:00 2001 From: Janus Varmarken Date: Fri, 31 Aug 2018 15:21:27 -0700 Subject: [PATCH] Some exploratory work on extracting TLS App Data sequence --- .../main/java/edu/uci/iotproject/Main.java | 72 +++++++++++-------- .../seqalignment/ExtractedSequence.java | 8 ++- .../seqalignment/SequenceExtraction.java | 39 +++++++++- 3 files changed, 84 insertions(+), 35 deletions(-) diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java index 1976870..73eac01 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java @@ -238,6 +238,7 @@ public class Main { }); } + // Print out all the pairs into a file for ON events File fileOnEvents = new File(onPairsPath); PrintWriter pwOn = null; @@ -324,43 +325,54 @@ public class Main { } pwOff.close(); + // ================================================================================================ // <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>> // // Currently need to know relevant hostname in advance :( - String hostname = "events.tplinkra.com"; +// String hostname = "events.tplinkra.com"; + String hostname = "rfe-us-west-1.dch.dlink.com"; // Conversations with 'hostname' for ON events. -// List onsForHostname = new ArrayList<>(); -// // Conversations with 'hostname' for OFF events. -// List offsForHostname = new ArrayList<>(); -// // "Unwrap" sequence groupings in ons/offs maps. -// ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v)); -// offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v)); -// // Extract representative sequence for ON and OFF by providing the list of conversations with -// // 'hostname' observed for each event type (the training data). -// SequenceExtraction seqExtraction = new SequenceExtraction(); + List onsForHostname = new ArrayList<>(); + // Conversations with 'hostname' for OFF events. + List offsForHostname = new ArrayList<>(); + // "Unwrap" sequence groupings in ons/offs maps. + ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v)); + offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v)); + + + Map> onsForHostnameGroupedByTlsAppDataSequence = TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(onsForHostname); + + + // Extract representative sequence for ON and OFF by providing the list of conversations with + // 'hostname' observed for each event type (the training data). + SequenceExtraction seqExtraction = new SequenceExtraction(); // ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname); // ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname); -// // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly -// // labeled). -// int onsLabeledAsOff = 0; -// Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence()); -// Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence()); -// SequenceAlignment seqAlg = seqExtraction.getAlignmentAlgorithm(); -// for (Conversation c : onsForHostname) { -// Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c); -// if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) { -// onsLabeledAsOff++; -// } -// } -// int offsLabeledAsOn = 0; -// for (Conversation c : offsForHostname) { -// Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c); -// if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) { -// offsLabeledAsOn++; -// } -// } -// System.out.println(""); + + ExtractedSequence extractedSequenceForOn = seqExtraction.extractByTlsAppData(onsForHostname); + ExtractedSequence extractedSequenceForOff = seqExtraction.extractByTlsAppData(offsForHostname); + + // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly + // labeled). + int onsLabeledAsOff = 0; + Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence()); + Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence()); + SequenceAlignment seqAlg = seqExtraction.getAlignmentAlgorithm(); + for (Conversation c : onsForHostname) { + Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c); + if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) { + onsLabeledAsOff++; + } + } + int offsLabeledAsOn = 0; + for (Conversation c : offsForHostname) { + Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c); + if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) { + offsLabeledAsOn++; + } + } + System.out.println(""); // ================================================================================================ diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java index 423e3c8..297107d 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/ExtractedSequence.java @@ -1,6 +1,9 @@ package edu.uci.iotproject.comparison.seqalignment; import edu.uci.iotproject.Conversation; +import org.pcap4j.core.PcapPacket; + +import java.util.List; /** * TODO add class documentation. @@ -15,11 +18,12 @@ public class ExtractedSequence { private final String mSequenceString; - public ExtractedSequence(Conversation sequence, int maxAlignmentCost) { + public ExtractedSequence(Conversation sequence, int maxAlignmentCost, boolean tlsAppDataAlignment) { mRepresentativeSequence = sequence; mMaxAlignmentCost = maxAlignmentCost; StringBuilder sb = new StringBuilder(); - sequence.getPackets().forEach(p -> { + List pkts = tlsAppDataAlignment ? sequence.getTlsApplicationDataPackets() : sequence.getPackets(); + pkts.forEach(p -> { if (sb.length() != 0) sb.append(" "); sb.append(p.getOriginalLength()); }); diff --git a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java index 8003670..e208501 100644 --- a/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java +++ b/Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/comparison/seqalignment/SequenceExtraction.java @@ -3,8 +3,10 @@ package edu.uci.iotproject.comparison.seqalignment; import edu.uci.iotproject.Conversation; import edu.uci.iotproject.analysis.TcpConversationUtils; +import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * TODO add class documentation. @@ -72,7 +74,7 @@ public class SequenceExtraction { // // } - + // Building signature from entire sequence public ExtractedSequence extract(List convsForActionForHostname) { // First group conversations by packet sequences. // TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here @@ -80,6 +82,7 @@ public class SequenceExtraction { // different due to differences in how they are terminated. Map> groupedBySequence = TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname); + // Then get a hold of one of the conversations that gave rise to the most frequent sequence. Conversation mostFrequentConv = null; int maxFrequency = 0; @@ -112,9 +115,39 @@ public class SequenceExtraction { maxCost = alignmentCost; } } - return new ExtractedSequence(mostFrequentConv, maxCost); + return new ExtractedSequence(mostFrequentConv, maxCost, false); } - + // Building signature from only TLS Application Data packets + public ExtractedSequence extractByTlsAppData(List convsForActionForHostname) { + // TODO: temporary hack to avoid 97-only conversations for dlink plug. We need some preprocessing/data cleaning. + convsForActionForHostname = convsForActionForHostname.stream().filter(c -> c.getTlsApplicationDataPackets().size() > 1).collect(Collectors.toList()); + + Map> groupedByTlsAppDataSequence = + TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(convsForActionForHostname); + // Get a Conversation representing the most frequent TLS application data sequence. + Conversation mostFrequentConv = groupedByTlsAppDataSequence.values().stream().max((l1, l2) -> { + // The frequency of a conversation with a specific packet sequence is the list size as that represents how + // many conversations exhibit that packet sequence. + // Hence, the difference between the list sizes can be used directly as the return value of the Comparator. + // Note: we break ties by choosing the one with the most TLS application data packets (i.e., the longest + // sequence) in case the frequencies are equal. + int diff = l1.size() - l2.size(); + return diff != 0 ? diff : l1.get(0).getTlsApplicationDataPackets().size() - l2.get(0).getTlsApplicationDataPackets().size(); + }).get().get(0); // Just pick the first as a representative of the most frequent sequence. + // Lengths of TLS Application Data packets in the most frequent (or most frequent and longest) conversation. + Integer[] mostFreqSeq = TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(mostFrequentConv); + // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the + // each of the rest of the conversations also associated with this action and hostname. + int maxCost = 0; + for (Conversation c : convsForActionForHostname) { + if (c == mostFrequentConv) continue; + int cost = mAlignmentAlg.calculateAlignment(mostFreqSeq, TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(c)); + maxCost = cost > maxCost ? cost : maxCost; + } + return new ExtractedSequence(mostFrequentConv, maxCost, true); + // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the + // each of the rest of the conversations also associated with this action and hostname. + } } -- 2.34.1