});
}
+
// Print out all the pairs into a file for ON events
File fileOnEvents = new File(onPairsPath);
PrintWriter pwOn = null;
}
pwOff.close();
+
// ================================================================================================
// <<< Some work-in-progress/explorative code that extracts a "representative" sequence >>>
//
// Currently need to know relevant hostname in advance :(
- String hostname = "events.tplinkra.com";
+// String hostname = "events.tplinkra.com";
+ String hostname = "rfe-us-west-1.dch.dlink.com";
// Conversations with 'hostname' for ON events.
-// List<Conversation> onsForHostname = new ArrayList<>();
-// // Conversations with 'hostname' for OFF events.
-// List<Conversation> offsForHostname = new ArrayList<>();
-// // "Unwrap" sequence groupings in ons/offs maps.
-// ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
-// offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
-// // Extract representative sequence for ON and OFF by providing the list of conversations with
-// // 'hostname' observed for each event type (the training data).
-// SequenceExtraction seqExtraction = new SequenceExtraction();
+ List<Conversation> onsForHostname = new ArrayList<>();
+ // Conversations with 'hostname' for OFF events.
+ List<Conversation> offsForHostname = new ArrayList<>();
+ // "Unwrap" sequence groupings in ons/offs maps.
+ ons.get(hostname).forEach((k,v) -> onsForHostname.addAll(v));
+ offs.get(hostname).forEach((k,v) -> offsForHostname.addAll(v));
+
+
+ Map<String, List<Conversation>> onsForHostnameGroupedByTlsAppDataSequence = TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(onsForHostname);
+
+
+ // Extract representative sequence for ON and OFF by providing the list of conversations with
+ // 'hostname' observed for each event type (the training data).
+ SequenceExtraction seqExtraction = new SequenceExtraction();
// ExtractedSequence extractedSequenceForOn = seqExtraction.extract(onsForHostname);
// ExtractedSequence extractedSequenceForOff = seqExtraction.extract(offsForHostname);
-// // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
-// // labeled).
-// int onsLabeledAsOff = 0;
-// Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
-// Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
-// SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
-// for (Conversation c : onsForHostname) {
-// Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
-// if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
-// onsLabeledAsOff++;
-// }
-// }
-// int offsLabeledAsOn = 0;
-// for (Conversation c : offsForHostname) {
-// Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
-// if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
-// offsLabeledAsOn++;
-// }
-// }
-// System.out.println("");
+
+ ExtractedSequence extractedSequenceForOn = seqExtraction.extractByTlsAppData(onsForHostname);
+ ExtractedSequence extractedSequenceForOff = seqExtraction.extractByTlsAppData(offsForHostname);
+
+ // Let's check how many ONs align with OFFs and vice versa (that is, how many times an event is incorrectly
+ // labeled).
+ int onsLabeledAsOff = 0;
+ Integer[] representativeOnSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOn.getRepresentativeSequence());
+ Integer[] representativeOffSeq = TcpConversationUtils.getPacketLengthSequence(extractedSequenceForOff.getRepresentativeSequence());
+ SequenceAlignment<Integer> seqAlg = seqExtraction.getAlignmentAlgorithm();
+ for (Conversation c : onsForHostname) {
+ Integer[] onSeq = TcpConversationUtils.getPacketLengthSequence(c);
+ if (seqAlg.calculateAlignment(representativeOffSeq, onSeq) <= extractedSequenceForOff.getMaxAlignmentCost()) {
+ onsLabeledAsOff++;
+ }
+ }
+ int offsLabeledAsOn = 0;
+ for (Conversation c : offsForHostname) {
+ Integer[] offSeq = TcpConversationUtils.getPacketLengthSequence(c);
+ if (seqAlg.calculateAlignment(representativeOnSeq, offSeq) <= extractedSequenceForOn.getMaxAlignmentCost()) {
+ offsLabeledAsOn++;
+ }
+ }
+ System.out.println("");
// ================================================================================================
import edu.uci.iotproject.Conversation;
import edu.uci.iotproject.analysis.TcpConversationUtils;
+import java.util.Comparator;
import java.util.List;
import java.util.Map;
+import java.util.stream.Collectors;
/**
* TODO add class documentation.
//
// }
-
+ // Building signature from entire sequence
public ExtractedSequence extract(List<Conversation> convsForActionForHostname) {
// First group conversations by packet sequences.
// TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here
// different due to differences in how they are terminated.
Map<String, List<Conversation>> groupedBySequence =
TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname);
+
// Then get a hold of one of the conversations that gave rise to the most frequent sequence.
Conversation mostFrequentConv = null;
int maxFrequency = 0;
maxCost = alignmentCost;
}
}
- return new ExtractedSequence(mostFrequentConv, maxCost);
+ return new ExtractedSequence(mostFrequentConv, maxCost, false);
}
-
+ // Building signature from only TLS Application Data packets
+ public ExtractedSequence extractByTlsAppData(List<Conversation> convsForActionForHostname) {
+ // TODO: temporary hack to avoid 97-only conversations for dlink plug. We need some preprocessing/data cleaning.
+ convsForActionForHostname = convsForActionForHostname.stream().filter(c -> c.getTlsApplicationDataPackets().size() > 1).collect(Collectors.toList());
+
+ Map<String, List<Conversation>> groupedByTlsAppDataSequence =
+ TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(convsForActionForHostname);
+ // Get a Conversation representing the most frequent TLS application data sequence.
+ Conversation mostFrequentConv = groupedByTlsAppDataSequence.values().stream().max((l1, l2) -> {
+ // The frequency of a conversation with a specific packet sequence is the list size as that represents how
+ // many conversations exhibit that packet sequence.
+ // Hence, the difference between the list sizes can be used directly as the return value of the Comparator.
+ // Note: we break ties by choosing the one with the most TLS application data packets (i.e., the longest
+ // sequence) in case the frequencies are equal.
+ int diff = l1.size() - l2.size();
+ return diff != 0 ? diff : l1.get(0).getTlsApplicationDataPackets().size() - l2.get(0).getTlsApplicationDataPackets().size();
+ }).get().get(0); // Just pick the first as a representative of the most frequent sequence.
+ // Lengths of TLS Application Data packets in the most frequent (or most frequent and longest) conversation.
+ Integer[] mostFreqSeq = TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(mostFrequentConv);
+ // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+ // each of the rest of the conversations also associated with this action and hostname.
+ int maxCost = 0;
+ for (Conversation c : convsForActionForHostname) {
+ if (c == mostFrequentConv) continue;
+ int cost = mAlignmentAlg.calculateAlignment(mostFreqSeq, TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(c));
+ maxCost = cost > maxCost ? cost : maxCost;
+ }
+ return new ExtractedSequence(mostFrequentConv, maxCost, true);
+ // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+ // each of the rest of the conversations also associated with this action and hostname.
+ }
}