Creating a proper command line and script for signature generation.
[pingpong.git] / Code / Projects / PacketLevelSignatureExtractor / src / main / java / edu / uci / iotproject / util / PcapPacketUtils.java
index 067af93edd6acf435ab154480b0a1434d2a9fa02..2f20945921b25c54f354835f1f0d28d03c7d2a35 100644 (file)
@@ -214,51 +214,51 @@ public final class PcapPacketUtils {
     }
 
     /**
-     * Merge signatures in {@code List} of {@code List} of {@code List} of {@code PcapPacket} objects.
+     * Concatenate sequences in {@code List} of {@code List} of {@code List} of {@code PcapPacket} objects.
      * We cross-check these with {@code List} of {@code Conversation} objects to see
      * if two {@code List} of {@code PcapPacket} objects actually belong to the same {@code Conversation}.
      * @param signatures A {@link List} of {@link List} of {@link List} of
-     *          {@link PcapPacket} objects that needs to be checked and merged.
-     * @param conversations A {@link List} of {@link Conversation} objects as reference for merging.
+     *          {@link PcapPacket} objects that needs to be checked and concatenated.
+     * @param conversations A {@link List} of {@link Conversation} objects as reference for concatenation.
      * @return A {@link List} of {@link List} of {@link List} of
-     *          {@link PcapPacket} objects as the result of the merging.
+     *          {@link PcapPacket} objects as the result of the concatenation.
      */
     public static List<List<List<PcapPacket>>>
-            mergeSignatures(List<List<List<PcapPacket>>> signatures, List<Conversation> conversations) {
+            concatSequences(List<List<List<PcapPacket>>> signatures, List<Conversation> conversations) {
 
         // TODO: THIS IS NOT A DEEP COPY; IT BASICALLY CREATES A REFERENCE TO THE SAME LIST OBJECT
         // List<List<List<PcapPacket>>> copySignatures = new ArrayList<>(signatures);
         // Make a deep copy first.
         List<List<List<PcapPacket>>> copySignatures = new ArrayList<>();
         listDeepCopy(copySignatures, signatures);
-        // Traverse and look into the pairs of signatures.
+        // Traverse and look into the pairs.
         for (int first = 0; first < signatures.size(); first++) {
             List<List<PcapPacket>> firstList = signatures.get(first);
             for (int second = first+1; second < signatures.size(); second++) {
-                int maxSignatureEl = 0; // Number of maximum signature elements.
+                int maxSignatureEl = 0;
                 List<List<PcapPacket>> secondList = signatures.get(second);
                 int initialSecondListMembers = secondList.size();
-                // Iterate over the signatures in the first list.
+                // Iterate over the sequences in the first list.
                 for (List<PcapPacket> signature : firstList) {
                     signature.removeIf(el -> el == null); // Clean up null elements.
-                    // Return the Conversation that the signature is part of.
+                    // Return the Conversation that the sequence is part of.
                     Conversation conv = TcpConversationUtils.returnConversation(signature, conversations);
                     // Find the element of the second list that is a match for that Conversation.
                     for (List<PcapPacket> ppList : secondList) {
                         ppList.removeIf(el -> el == null); // Clean up null elements.
-                        // Check if they are part of a Conversation and are adjacent to the first signature.
+                        // Check if they are part of a Conversation and are adjacent to the first sequence.
                         // If yes then merge into the first list.
                         TcpConversationUtils.SignaturePosition position =
                                 TcpConversationUtils.isPartOfConversationAndAdjacent(signature, ppList, conv);
                         if (position == TcpConversationUtils.SignaturePosition.LEFT_ADJACENT) {
-                            // Merge to the left side of the first signature.
+                            // Merge to the left side of the first sequence.
                             ppList.addAll(signature);
                             signature = ppList;
                             maxSignatureEl = signature.size() > maxSignatureEl ? signature.size() : maxSignatureEl;
                             secondList.remove(ppList); // Remove as we merge.
                             break;
                         } else if (position == TcpConversationUtils.SignaturePosition.RIGHT_ADJACENT) {
-                            // Merge to the right side of the first signature.
+                            // Merge to the right side of the first sequence.
                             signature.addAll(ppList);
                             maxSignatureEl = signature.size() > maxSignatureEl ? signature.size() : maxSignatureEl;
                             secondList.remove(ppList); // Remove as we merge.
@@ -269,16 +269,16 @@ public final class PcapPacketUtils {
                 // Call it a successful merging if there are only less than 5 elements from the second list that
                 // cannot be merged.
                 if (secondList.size() < SIGNATURE_MERGE_THRESHOLD) {
-                    // Prune the unsuccessfully merged signatures (i.e., these will have size() < maxSignatureEl).
+                    // Prune the unsuccessfully merged sequences (i.e., these will have size() < maxSignatureEl).
                     final int maxNumOfEl = maxSignatureEl;
                     // TODO: DOUBLE CHECK IF WE REALLY NEED TO PRUNE FAILED BINDINGS
                     // TODO: SOMETIMES THE SEQUENCES ARE JUST INCOMPLETE
                     // TODO: AND BOTH THE COMPLETE AND INCOMPLETE SEQUENCES ARE VALID SIGNATURES!
                     firstList.removeIf(el -> el.size() < maxNumOfEl);
-                    // Remove the merged set of signatures when successful.
+                    // Remove the merged set of sequences when successful.
                     signatures.remove(secondList);
                 } else if (secondList.size() < initialSecondListMembers) {
-                    // If only some of the signatures from the second list are merged, this means UNSUCCESSFUL merging.
+                    // If only some of the sequences from the second list are merged, this means UNSUCCESSFUL merging.
                     // Return the original copy of the signatures object.
                     return copySignatures;
                 }
@@ -310,9 +310,9 @@ public final class PcapPacketUtils {
     }
 
     /**
-     * Sort the signatures in the {@code List} of {@code List} of {@code List} of {@code PcapPacket} objects.
-     * The purpose of this is to sort the order of signatures in the signature list. For detection purposes, we need
-     * to know if one signature occurs earlier/later in time with respect to the other signatures for more confidence
+     * Sort the sequences in the {@code List} of {@code List} of {@code List} of {@code PcapPacket} objects.
+     * The purpose of this is to sort the order of sequences in the sequence list. For detection purposes, we need
+     * to know if one sequence occurs earlier/later in time with respect to the other sequences for more confidence
      * in detecting the occurrence of an event.
      * @param signatures A {@code List} of {@code List} of {@code List} of {@code PcapPacket} objects that needs sorting.
      *                   We assume that innermost {@code List} of {@code PcapPacket} objects have been sorted ascending
@@ -320,7 +320,7 @@ public final class PcapPacketUtils {
      *                   {@code clusterToListOfPcapPackets} method.
      * @return A sorted {@code List} of {@code List} of {@code List} of {@code PcapPacket} objects.
      */
-    public static List<List<List<PcapPacket>>> sortSignatures(List<List<List<PcapPacket>>> signatures) {
+    public static List<List<List<PcapPacket>>> sortSequences(List<List<List<PcapPacket>>> signatures) {
         // TODO: This is the simplest solution!!! Might not cover all corner cases.
         // TODO: Sort the list of lists based on the first packet's timestamps!
 //        Collections.sort(signatures, (p1, p2) -> {
@@ -346,16 +346,6 @@ public final class PcapPacketUtils {
                 if (Math.abs(timestamp1 - timestamp2) < TriggerTrafficExtractor.INCLUSION_WINDOW_MILLIS) {
                     // If these two are within INCLUSION_WINDOW_MILLIS window then compare!
                     compare = p1.get(count1).get(0).getTimestamp().compareTo(p2.get(count2).get(0).getTimestamp());
-//                    if (comparePrev != 0) { // First time since it is 0
-//                        if (Integer.signum(compare) != Integer.signum(comparePrev)) {
-//                            // Throw an exception if the order of the two signatures is not consistent,
-//                            // E.g., 111, 222, 333 in one occassion and 222, 333, 111 in the other.
-//                            throw new Error("OVERLAP WARNING: " + "" +
-//                                    "Please remove one of the sequences: " +
-//                                    p1.get(0).get(0).length() + "... OR " +
-//                                    p2.get(0).get(0).length() + "...");
-//                        }
-//                    }
                     overlapChecking(compare, comparePrev, p1.get(count1), p2.get(count2));
                     comparePrev = compare;
                     count1++;
@@ -468,6 +458,318 @@ public final class PcapPacketUtils {
         }
     }
 
+    /**
+     * Extract core point range in the form of {@code List} of {@code List} of {@code PcapPacket} objects.
+     *
+     * @param pairs The pairs for core points extraction.
+     * @param eps Epsilon value for the DBSCAN algorithm.
+     * @param minPts minPts value for the DBSCAN algorithm.
+     * @return A {@link List} of {@link List} of {@code PcapPacket} objects that contains core points range
+     *          in the first and second element.
+     */
+    public static List<List<PcapPacket>> extractRangeCorePoints(List<List<PcapPacket>> pairs, double eps, int minPts) {
+
+        // Initialize min and max value
+        PcapPacket minFirstElement = null;
+        PcapPacket maxFirstElement = null;
+        PcapPacket minSecondElement = null;
+        PcapPacket maxSecondElement = null;
+
+        // Iterate over pairs
+        for(List<PcapPacket> pair : pairs) {
+            if (isCorePoint(pair, pairs, eps, minPts)) {
+                // Record the first element
+                if (minFirstElement == null || pair.get(0).length() < minFirstElement.length()) {
+                    minFirstElement = pair.get(0);
+                }
+                if (maxFirstElement == null || pair.get(0).length() > maxFirstElement.length()) {
+                    maxFirstElement = pair.get(0);
+                }
+                // Record the second element
+                if (minSecondElement == null || pair.get(1).length() < minSecondElement.length()) {
+                    minSecondElement = pair.get(1);
+                }
+                if (maxSecondElement == null || pair.get(1).length() > maxSecondElement.length()) {
+                    maxSecondElement = pair.get(1);
+                }
+            }
+        }
+        List<PcapPacket> corePointLowerBound = new ArrayList<>();
+        corePointLowerBound.add(0, minFirstElement);
+        corePointLowerBound.add(1, minSecondElement);
+        List<PcapPacket> corePointUpperBound = new ArrayList<>();
+        corePointUpperBound.add(0, maxFirstElement);
+        corePointUpperBound.add(1, maxSecondElement);
+        // Combine lower and upper bounds
+        List<List<PcapPacket>> listRangeCorePoints = new ArrayList<>();
+        listRangeCorePoints.add(corePointLowerBound);
+        listRangeCorePoints.add(corePointUpperBound);
+
+        return listRangeCorePoints;
+    }
+
+    /**
+     * Test whether the {@code List} of {@code PcapPacket} objects is a core point.
+     *
+     * @param pair The pair to be tested.
+     * @param pairs All of the pairs.
+     * @param eps Epsilon value for the DBSCAN algorithm.
+     * @param minPts minPts value for the DBSCAN algorithm.
+     * @return True if the pair is a core point.
+     */
+    private static boolean isCorePoint(List<PcapPacket> pair, List<List<PcapPacket>> pairs, double eps, int minPts) {
+
+        int corePoints = 0;
+        int x1 = pair.get(0) == null ? 0 : pair.get(0).length();
+        int y1 = pair.get(1) == null ? 0 : pair.get(1).length();
+        // Check if we have enough core points
+        for(List<PcapPacket> pairInPairs : pairs) {
+            int x2 = pairInPairs.get(0) == null ? 0 : pairInPairs.get(0).length();
+            int y2 = pairInPairs.get(1) == null ? 0 : pairInPairs.get(1).length();
+            // Measure distance between x and y
+            double distance = Math.sqrt(Math.pow((double)(x2 - x1), 2) + Math.pow((double)(y2 - y1), 2));
+            // Increment core points count if this point is within eps
+            if (distance <= eps) {
+                corePoints++;
+            }
+        }
+        // Return true if the number of core points >= minPts
+        if (corePoints >= minPts) {
+            return true;
+        }
+
+        return false;
+    }
+
+    /**
+     * Test the conservativeness of the signatures (basically whether we want strict or range-based matching).
+     * We go for a conservative approach (strict matching) when there is no range or there are ranges but the
+     * ranges overlap across multiple signatures, e.g., ON and OFF signatures.
+     *
+     * @param signature The signature we want to check and overwrite if needed.
+     * @param eps Epsilon value for the DBSCAN algorithm.
+     * @param otherSignatures Other signatures we want to check against this signature.
+     * @return A boolean that is True when range-based matching is used.
+     */
+    public static boolean isRangeBasedMatching(List<List<List<PcapPacket>>> signature, double eps,
+                                                List<List<List<PcapPacket>>> ...otherSignatures) {
+        // Check against multiple signatures
+        // TODO: Per March 2019 we only support ON and OFF signatures though
+        for(List<List<List<PcapPacket>>> otherSig : otherSignatures) {
+            // Do conservative strict matching if there is any overlap
+            if (isConservativeChecking(signature, otherSig, eps)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Test the conservativeness of the signatures (basically whether we want strict or range-based matching).
+     * We go for a conservative approach (strict matching) when there is no range or there are ranges but the
+     * ranges overlap across multiple signatures, e.g., ON and OFF signatures.
+     *
+     * @param signature The signature we want to check and overwrite if needed.
+     * @param corePointRange The core points range of this signature.
+     * @return A boolean that is True when range-based matching is used.
+     */
+    public static List<List<List<PcapPacket>>> useRangeBasedMatching(List<List<List<PcapPacket>>> signature,
+                                                                     List<List<List<PcapPacket>>> corePointRange) {
+        // Do range-based checking instead if there is no overlap
+        // Transform our signature into a range-based format
+        List<List<List<PcapPacket>>> rangeBasedSignature = getSequenceRanges(signature);
+        // We have to iterate sequence by sequence in the signature that has already gone through concatenation/merging
+        // And compare the packet lengths against the ones in corePointRange that are still in pairs/points
+        List<List<List<PcapPacket>>> finalSignature = new ArrayList<>();
+
+        // Construct the range-based signature
+        for(List<List<PcapPacket>> listOfSequences : rangeBasedSignature) {
+            List<PcapPacket> sequenceLowerBound = listOfSequences.get(0);
+            List<PcapPacket> sequenceUpperBound = listOfSequences.get(1);
+            List<List<PcapPacket>> newList = new ArrayList<>();
+            List<PcapPacket> newListLowerBound = new ArrayList<>();
+            List<PcapPacket> newListUpperBound = new ArrayList<>();
+            // Iterate over the packets
+            for(PcapPacket lowerBound : sequenceLowerBound) {
+                // Look for the lower and upper bounds from the signature
+                PcapPacket upperBound = sequenceUpperBound.get(sequenceLowerBound.indexOf(lowerBound));
+                // Look for the lower and upper bounds from the cluster analysis (core point range)
+                List<PcapPacket> bounds = getCorePointBounds(corePointRange, lowerBound, upperBound);
+                // Add into list
+                // The first element is the lower bound and the second element is the upper bound
+                newListLowerBound.add(bounds.get(0));
+                newListUpperBound.add(bounds.get(1));
+            }
+            newList.add(0, newListLowerBound);
+            newList.add(1, newListUpperBound);
+            finalSignature.add(newList);
+        }
+
+        return finalSignature;
+    }
+
+    /*
+     * Get the corresponding PcapPacket object for lower and upper bounds.
+     */
+    private static List<PcapPacket> getCorePointBounds(List<List<List<PcapPacket>>> corePointRange,
+                                                       PcapPacket lowerBound, PcapPacket upperBound) {
+
+        List<PcapPacket> listBounds = new ArrayList<>();
+        // Iterate over PcapPacket one by one
+        for(List<List<PcapPacket>> listOfListPcapPacket : corePointRange) {
+            List<PcapPacket> listCorePointLowerBound = listOfListPcapPacket.get(0);
+            List<PcapPacket> listCorePointUpperBound = listOfListPcapPacket.get(1);
+            for(PcapPacket corePointLowerBound : listCorePointLowerBound) {
+                PcapPacket corePointUpperBound =
+                        listCorePointUpperBound.get(listCorePointLowerBound.indexOf(corePointLowerBound));
+                // Return if the match for the core point bounds is found
+                // Basically the core point range has to be within the signature range
+                if (lowerBound.length() <= corePointLowerBound.length() &&
+                    corePointUpperBound.length() <= upperBound.length()) {
+                    listBounds.add(0, corePointLowerBound);
+                    listBounds.add(1, corePointUpperBound);
+                    return listBounds;
+                }
+                // Just skip the null elements
+                if (lowerBound == null && upperBound == null) {
+                    continue;
+                }
+            }
+        }
+        // Return null if not found
+        return null;
+    }
+
+    /**
+     * Check if there is any overlap between the signature stored in this class and another signature.
+     * Conditions:
+     * 1) If both signatures do not have any range, then we need to do conservative checking (return true).
+     * 2) If both signatures have the same number of packets/packet lengths, then we check the range; if the
+     *    numbers of packets/packet lengths are different then we assume that there is no overlap.
+     * 3) If there is any range in the signatures, then we need to check for overlap.
+     * 4) If there is overlap for EVERY packet/packet length, then we return true (conservative checking);
+     *    otherwise false (range-based checking).
+     *
+     * @param signature A {@code List} of {@code List} of {@code List} of {@code PcapPacket} objects to be checked
+     *                  for overlaps with the other signature.
+     * @param otherSignature A {@code List} of {@code List} of {@code List} of {@code PcapPacket} objects to be checked
+     *                       for overlaps with the signature.
+     * @param eps Epsilon value for the DBSCAN algorithm.
+     * @return A boolean that is true if there is an overlap; false otherwise.
+     */
+    public static boolean isConservativeChecking(List<List<List<PcapPacket>>> signature,
+                                                 List<List<List<PcapPacket>>> otherSignature,
+                                                 double eps) {
+
+        // Get the ranges of the two signatures
+        List<List<List<PcapPacket>>> signatureRanges = getSequenceRanges(signature);
+        List<List<List<PcapPacket>>> otherSignatureRanges = getSequenceRanges(otherSignature);
+        if (signature.size() == 1 && signature.get(0).get(0).size() == 2) {
+            // The signature only has 2 packets
+            return true;
+        } else if (!isRangeBased(signatureRanges) && !isRangeBased(otherSignatureRanges)) {
+            // Conservative checking when there is no range
+            return true;
+        } else if(signatureRanges.size() != otherSignatureRanges.size()) {
+            // The two signatures have different numbers of packets/packet lengths
+            return false;
+        } else {
+            // There is range; check if there is overlap
+            return checkOverlap(signatureRanges, otherSignatureRanges, eps);
+        }
+    }
+
+    /* Find the sequence with the minimum packet lengths.
+     * The second-layer list should contain the minimum sequence for element 0 and maximum sequence for element 1.
+     */
+    private static List<List<List<PcapPacket>>> getSequenceRanges(List<List<List<PcapPacket>>> signature) {
+
+        // Start from the first index
+        List<List<List<PcapPacket>>> rangeBasedSequence = new ArrayList<>();
+        for(List<List<PcapPacket>> listListPcapPacket : signature) {
+            List<List<PcapPacket>> minMaxSequence = new ArrayList<>();
+            // Both searches start from index 0
+            List<PcapPacket> minSequence = new ArrayList<>(listListPcapPacket.get(0));
+            List<PcapPacket> maxSequence = new ArrayList<>(listListPcapPacket.get(0));
+            for(List<PcapPacket> listPcapPacket : listListPcapPacket) {
+                for(PcapPacket pcapPacket : listPcapPacket) {
+                    int index = listPcapPacket.indexOf(pcapPacket);
+                    // Set the new minimum if length at the index is minimum
+                    if (pcapPacket.length() < minSequence.get(index).length()) {
+                        minSequence.set(index, pcapPacket);
+                    }
+                    // Set the new maximum if length at the index is maximum
+                    if (pcapPacket.length() > maxSequence.get(index).length()) {
+                        maxSequence.set(index, pcapPacket);
+                    }
+                }
+            }
+            // minSequence as element 0 and maxSequence as element 1
+            minMaxSequence.add(minSequence);
+            minMaxSequence.add(maxSequence);
+            rangeBasedSequence.add(minMaxSequence);
+        }
+
+        return rangeBasedSequence;
+    }
+
+    /*
+     * Check for overlap since we have range in at least one of the signatures.
+     * Overlap is only true when all ranges overlap. We need to check in order.
+     */
+    private static boolean checkOverlap(List<List<List<PcapPacket>>> signatureRanges,
+                                 List<List<List<PcapPacket>>> otherSignatureRanges, double eps) {
+
+        for(List<List<PcapPacket>> listListPcapPacket : signatureRanges) {
+            // Lower bound of the range is in index 0
+            // Upper bound of the range is in index 1
+            int sequenceSetIndex = signatureRanges.indexOf(listListPcapPacket);
+            List<PcapPacket> minSequenceSignature = listListPcapPacket.get(0);
+            List<PcapPacket> maxSequenceSignature = listListPcapPacket.get(1);
+            for(PcapPacket pcapPacket : minSequenceSignature) {
+                // Get the lower and upper bounds of the current signature
+                int packetIndex = minSequenceSignature.indexOf(pcapPacket);
+                int lowerBound = pcapPacket.length();
+                int upperBound = maxSequenceSignature.get(packetIndex).length();
+                // Check for range overlap in the other signature!
+                // Check the packet/packet length at the same position
+                List<PcapPacket> minSequenceSignatureOther = otherSignatureRanges.get(sequenceSetIndex).get(0);
+                List<PcapPacket> maxSequenceSignatureOther = otherSignatureRanges.get(sequenceSetIndex).get(1);
+                int lowerBoundOther = minSequenceSignatureOther.get(packetIndex).length();
+                int upperBoundOther = maxSequenceSignatureOther.get(packetIndex).length();
+                if (!(lowerBoundOther-(int)eps <= lowerBound && lowerBound <= upperBoundOther+(int)eps) &&
+                    !(lowerBoundOther-(int)eps <= upperBound && upperBound <= upperBoundOther+(int)eps)) {
+                    return false;
+                }
+            }
+        }
+
+        return true;
+    }
+
+    /*
+     * Check and see if there is any range in the signatures
+     */
+    private static boolean isRangeBased(List<List<List<PcapPacket>>> signatureRanges) {
+
+        for(List<List<PcapPacket>> listListPcapPacket : signatureRanges) {
+            // Lower bound of the range is in index 0
+            // Upper bound of the range is in index 1
+            List<PcapPacket> minSequence = listListPcapPacket.get(0);
+            List<PcapPacket> maxSequence = listListPcapPacket.get(1);
+            for(PcapPacket pcapPacket : minSequence) {
+                int index = minSequence.indexOf(pcapPacket);
+                if (pcapPacket.length() != maxSequence.get(index).length()) {
+                    // If there is any packet length that differs in the minSequence
+                    // and maxSequence, then it is range-based
+                    return true;
+                }
+            }
+        }
+
+        return false;
+    }
+
     /**
      * Remove a sequence in a signature object.
      *