+ // Building signature from only TLS Application Data packets
+ public ExtractedSequence extractByTlsAppData(List<Conversation> convsForActionForHostname) {
+ // TODO: temporary hack to avoid 97-only conversations for dlink plug. We need some preprocessing/data cleaning.
+ convsForActionForHostname = convsForActionForHostname.stream().filter(c -> c.getTlsApplicationDataPackets().size() > 1).collect(Collectors.toList());
+
+ Map<String, List<Conversation>> groupedByTlsAppDataSequence =
+ TcpConversationUtils.groupConversationsByTlsApplicationDataPacketSequence(convsForActionForHostname);
+ // Get a Conversation representing the most frequent TLS application data sequence.
+ Conversation mostFrequentConv = groupedByTlsAppDataSequence.values().stream().max((l1, l2) -> {
+ // The frequency of a conversation with a specific packet sequence is the list size as that represents how
+ // many conversations exhibit that packet sequence.
+ // Hence, the difference between the list sizes can be used directly as the return value of the Comparator.
+ // Note: we break ties by choosing the one with the most TLS application data packets (i.e., the longest
+ // sequence) in case the frequencies are equal.
+ int diff = l1.size() - l2.size();
+ return diff != 0 ? diff : l1.get(0).getTlsApplicationDataPackets().size() - l2.get(0).getTlsApplicationDataPackets().size();
+ }).get().get(0); // Just pick the first as a representative of the most frequent sequence.
+ // Lengths of TLS Application Data packets in the most frequent (or most frequent and longest) conversation.
+ Integer[] mostFreqSeq = TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(mostFrequentConv);
+ // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+ // each of the rest of the conversations also associated with this action and hostname.
+ int maxCost = 0;
+ for (Conversation c : convsForActionForHostname) {
+ if (c == mostFrequentConv) continue;
+ int cost = mAlignmentAlg.calculateAlignment(mostFreqSeq, TcpConversationUtils.getPacketLengthSequenceTlsAppDataOnly(c));
+ maxCost = cost > maxCost ? cost : maxCost;
+ }
+ return new ExtractedSequence(mostFrequentConv, maxCost, true);
+ // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
+ // each of the rest of the conversations also associated with this action and hostname.
+ }