1 package edu.uci.iotproject.comparison.seqalignment;
3 import edu.uci.iotproject.Conversation;
4 import edu.uci.iotproject.analysis.TcpConversationUtils;
10 * TODO add class documentation.
12 * @author Janus Varmarken
14 public class SequenceExtraction {
17 private final SequenceAlignment<Integer> mAlignmentAlg;
20 public SequenceExtraction() {
21 mAlignmentAlg = new SequenceAlignment<>(new AlignmentPricer<>((i1,i2) -> Math.abs(i1-i2), i -> 10));
25 public SequenceExtraction(SequenceAlignment<Integer> alignmentAlgorithm) {
26 mAlignmentAlg = alignmentAlgorithm;
30 * Gets the {@link SequenceAlignment} used to perform the sequence extraction.
31 * @return the {@link SequenceAlignment} used to perform the sequence extraction.
33 public SequenceAlignment<Integer> getAlignmentAlgorithm() {
40 // * @param convsForAction A set of {@link Conversation}s known to be associated with a single type of user action.
42 // public void extract(List<Conversation> convsForAction) {
43 // int maxDifference = 0;
45 // for (int i = 0; i < convsForAction.size(); i++) {
46 // for (int j = i+1; j < convsForAction.size(); i++) {
47 // Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
48 // Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
49 // int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
50 // if (alignmentCost > maxDifference) {
51 // maxDifference = alignmentCost;
59 // public void extract(Map<String, List<Conversation>> hostnameToConvs) {
60 // int maxDifference = 0;
62 // for (int i = 0; i < convsForAction.size(); i++) {
63 // for (int j = i+1; j < convsForAction.size(); i++) {
64 // Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
65 // Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
66 // int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
67 // if (alignmentCost > maxDifference) {
68 // maxDifference = alignmentCost;
76 public ExtractedSequence extract(List<Conversation> convsForActionForHostname) {
77 // First group conversations by packet sequences.
78 // TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here
79 // as it can potentially result in sequences that are equal in terms of payload packets to be considered
80 // different due to differences in how they are terminated.
81 Map<String, List<Conversation>> groupedBySequence =
82 TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname);
83 // Then get a hold of one of the conversations that gave rise to the most frequent sequence.
84 Conversation mostFrequentConv = null;
86 for (Map.Entry<String, List<Conversation>> seqMapEntry : groupedBySequence.entrySet()) {
87 if (seqMapEntry.getValue().size() > maxFrequency) {
88 // Found a more frequent sequence
89 maxFrequency = seqMapEntry.getValue().size();
90 // We just pick the first conversation as the representative conversation for this sequence type.
91 mostFrequentConv = seqMapEntry.getValue().get(0);
92 } else if (seqMapEntry.getValue().size() == maxFrequency) {
93 // This sequence has the same frequency as the max frequency seen so far.
94 // Break ties by choosing the longest sequence.
95 // First get an arbitrary representative of currently examined sequence; we just pick the first.
96 Conversation c = seqMapEntry.getValue().get(0);
97 mostFrequentConv = c.getPackets().size() > mostFrequentConv.getPackets().size() ? c : mostFrequentConv;
100 // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
101 // each of the rest of the conversations also associated with this action and hostname.
103 final Integer[] mostFrequentConvSeq = TcpConversationUtils.getPacketLengthSequence(mostFrequentConv);
104 for (Conversation c : convsForActionForHostname) {
105 if (c == mostFrequentConv) {
106 // Don't compute distance to self.
109 Integer[] cSeq = TcpConversationUtils.getPacketLengthSequence(c);
110 int alignmentCost = mAlignmentAlg.calculateAlignment(mostFrequentConvSeq, cSeq);
111 if (alignmentCost > maxCost) {
112 maxCost = alignmentCost;
115 return new ExtractedSequence(mostFrequentConv, maxCost);