8003670b591e3a82b74ecb99143f621dc9fece55
[pingpong.git] / Code / Projects / SmartPlugDetector / src / main / java / edu / uci / iotproject / comparison / seqalignment / SequenceExtraction.java
1 package edu.uci.iotproject.comparison.seqalignment;
2
3 import edu.uci.iotproject.Conversation;
4 import edu.uci.iotproject.analysis.TcpConversationUtils;
5
6 import java.util.List;
7 import java.util.Map;
8
9 /**
10  * TODO add class documentation.
11  *
12  * @author Janus Varmarken
13  */
14 public class SequenceExtraction {
15
16
17     private final SequenceAlignment<Integer> mAlignmentAlg;
18
19
20     public SequenceExtraction() {
21         mAlignmentAlg = new SequenceAlignment<>(new AlignmentPricer<>((i1,i2) -> Math.abs(i1-i2), i -> 10));
22     }
23
24
25     public SequenceExtraction(SequenceAlignment<Integer> alignmentAlgorithm) {
26         mAlignmentAlg = alignmentAlgorithm;
27     }
28
29     /**
30      * Gets the {@link SequenceAlignment} used to perform the sequence extraction.
31      * @return the {@link SequenceAlignment} used to perform the sequence extraction.
32      */
33     public SequenceAlignment<Integer> getAlignmentAlgorithm() {
34         return mAlignmentAlg;
35     }
36
37     // Initial
38 //    /**
39 //     *
40 //     * @param convsForAction A set of {@link Conversation}s known to be associated with a single type of user action.
41 //     */
42 //    public void extract(List<Conversation> convsForAction) {
43 //        int maxDifference = 0;
44 //
45 //        for (int i = 0; i < convsForAction.size(); i++) {
46 //            for (int j = i+1; j < convsForAction.size(); i++) {
47 //                Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
48 //                Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
49 //                int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
50 //                if (alignmentCost > maxDifference) {
51 //                    maxDifference = alignmentCost;
52 //                }
53 //            }
54 //        }
55 //
56 //    }
57
58
59 //    public void extract(Map<String, List<Conversation>> hostnameToConvs) {
60 //        int maxDifference = 0;
61 //
62 //        for (int i = 0; i < convsForAction.size(); i++) {
63 //            for (int j = i+1; j < convsForAction.size(); i++) {
64 //                Integer[] sequence1 = getPacketLengthSequence(convsForAction.get(i));
65 //                Integer[] sequence2 = getPacketLengthSequence(convsForAction.get(j));
66 //                int alignmentCost = mAlignmentAlg.calculateAlignment(sequence1, sequence2);
67 //                if (alignmentCost > maxDifference) {
68 //                    maxDifference = alignmentCost;
69 //                }
70 //            }
71 //        }
72 //
73 //    }
74
75
76     public ExtractedSequence extract(List<Conversation> convsForActionForHostname) {
77         // First group conversations by packet sequences.
78         // TODO: the introduction of SYN/SYNACK, FIN/FINACK and RST as part of the sequence ID may be undesirable here
79         // as it can potentially result in sequences that are equal in terms of payload packets to be considered
80         // different due to differences in how they are terminated.
81         Map<String, List<Conversation>> groupedBySequence =
82                 TcpConversationUtils.groupConversationsByPacketSequence(convsForActionForHostname);
83         // Then get a hold of one of the conversations that gave rise to the most frequent sequence.
84         Conversation mostFrequentConv = null;
85         int maxFrequency = 0;
86         for (Map.Entry<String, List<Conversation>> seqMapEntry : groupedBySequence.entrySet()) {
87             if (seqMapEntry.getValue().size() > maxFrequency) {
88                 // Found a more frequent sequence
89                 maxFrequency = seqMapEntry.getValue().size();
90                 // We just pick the first conversation as the representative conversation for this sequence type.
91                 mostFrequentConv = seqMapEntry.getValue().get(0);
92             } else if (seqMapEntry.getValue().size() == maxFrequency) {
93                 // This sequence has the same frequency as the max frequency seen so far.
94                 // Break ties by choosing the longest sequence.
95                 // First get an arbitrary representative of currently examined sequence; we just pick the first.
96                 Conversation c = seqMapEntry.getValue().get(0);
97                 mostFrequentConv = c.getPackets().size() > mostFrequentConv.getPackets().size() ? c : mostFrequentConv;
98             }
99         }
100         // Now find the maximum cost of aligning the most frequent (or, alternatively longest) conversation with the
101         // each of the rest of the conversations also associated with this action and hostname.
102         int maxCost = 0;
103         final Integer[] mostFrequentConvSeq = TcpConversationUtils.getPacketLengthSequence(mostFrequentConv);
104         for (Conversation c : convsForActionForHostname) {
105             if (c == mostFrequentConv) {
106                 // Don't compute distance to self.
107                 continue;
108             }
109             Integer[] cSeq = TcpConversationUtils.getPacketLengthSequence(c);
110             int alignmentCost = mAlignmentAlg.calculateAlignment(mostFrequentConvSeq, cSeq);
111             if (alignmentCost > maxCost) {
112                 maxCost = alignmentCost;
113             }
114         }
115         return new ExtractedSequence(mostFrequentConv, maxCost);
116     }
117
118
119
120 }