Adding signature combining; we can improve the performance later by requiring that...
authorrtrimana <rtrimana@uci.edu>
Thu, 20 Sep 2018 23:29:08 +0000 (16:29 -0700)
committerrtrimana <rtrimana@uci.edu>
Thu, 20 Sep 2018 23:29:08 +0000 (16:29 -0700)
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/Main.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/analysis/TcpConversationUtils.java
Code/Projects/SmartPlugDetector/src/main/java/edu/uci/iotproject/util/PcapPacketUtils.java

index d909be89739e9df373b893a95b48d5907abdeb09..022052f6201509dc8551af0807691da782907d4a 100644 (file)
@@ -49,10 +49,10 @@ public class Main {
         final String offPairsPath = "/scratch/July-2018/off.txt";
 
         // 1) D-Link July 26 experiment
-//        final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
-//        final String outputPcapFile = path + "/2018-07/dlink/dlink-processed.pcap";
-//        final String triggerTimesFile = path + "/2018-07/dlink/dlink-july-26-2018.timestamps";
-//        final String deviceIp = "192.168.1.246"; // .246 == phone; .199 == dlink plug?
+        final String inputPcapFile = path + "/2018-07/dlink/dlink.wlan1.local.pcap";
+        final String outputPcapFile = path + "/2018-07/dlink/dlink-processed.pcap";
+        final String triggerTimesFile = path + "/2018-07/dlink/dlink-july-26-2018.timestamps";
+        final String deviceIp = "192.168.1.199"; // .246 == phone; .199 == dlink plug?
 
         // 2) TP-Link July 25 experiment
 //        final String inputPcapFile = path + "/2018-07/tplink/tplink.wlan1.local.pcap";
@@ -93,10 +93,10 @@ public class Main {
 //        final String deviceIp = "192.168.1.140"; // .246 == phone; .140 == TP-Link bulb
 
         // 7) Kwikset Doorlock August 6 experiment
-        final String inputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock.wlan1.local.pcap";
-        final String outputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-processed.pcap";
-        final String triggerTimesFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-aug-6-2018.timestamps";
-        final String deviceIp = "192.168.1.246"; // .246 == phone; .142 == SmartThings Hub (note: use eth0 capture for this!)
+//        final String inputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock.wlan1.local.pcap";
+//        final String outputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-processed.pcap";
+//        final String triggerTimesFile = path + "/2018-08/kwikset-doorlock/kwikset-doorlock-aug-6-2018.timestamps";
+//        final String deviceIp = "192.168.1.246"; // .246 == phone; .142 == SmartThings Hub (note: use eth0 capture for this!)
 
         // September 12, 2018 - includes both wlan1 and eth1 interfaces
         //final String inputPcapFile = path + "/2018-08/kwikset-doorlock/kwikset3.wlan1.local.pcap";
@@ -256,6 +256,8 @@ public class Main {
                 map(e -> e.getValue()).
                 flatMap(List::stream).
                 collect(Collectors.toList());
+        //Collections.sort(onConversations, (c1, c2) -> c1.getPackets().)
+
         List<PcapPacketPair> onPairs = onConversations.stream().
                 map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) :
                         TcpConversationUtils.extractPacketPairs(c)).
@@ -274,39 +276,55 @@ public class Main {
         // Perform clustering on conversation logged as part of all OFF events.
         DBSCANClusterer<PcapPacketPair> offClusterer = new DBSCANClusterer<>(10.0, 45);
         List<Cluster<PcapPacketPair>> offClusters = offClusterer.cluster(offPairs);
+        // Sort the conversations as reference
+        List<Conversation> sortedAllConversation = TcpConversationUtils.sortConversationList(allConversations);
         // Output clusters
         System.out.println("========================================");
         System.out.println("       Clustering results for ON        ");
         System.out.println("       Number of clusters: " + onClusters.size());
         int count = 0;
-        List<List<PcapPacket>> ppListOfListReadOn = null;
+        List<List<List<PcapPacket>>> ppListOfListReadOn = new ArrayList<>();
+        List<List<List<PcapPacket>>> ppListOfListListOn = new ArrayList<>();
         for (Cluster<PcapPacketPair> c : onClusters) {
             System.out.println(String.format("<<< Cluster #%02d (%03d points) >>>", ++count, c.getPoints().size()));
             System.out.print(PrintUtils.toSummaryString(c));
             if(c.getPoints().size() > 45 && c.getPoints().size() < 55) {
                 // Print to file
                 List<List<PcapPacket>> ppListOfList = PcapPacketUtils.clusterToListOfPcapPackets(c);
-                PrintUtils.serializeClustersIntoFile("./onSignature" + count + ".sig", ppListOfList);
-                ppListOfListReadOn =
-                        PrintUtils.serializeClustersFromFile("./onSignature" + count + ".sig");
+                ppListOfListListOn.add(ppListOfList);
             }
         }
+        // TODO: Merging test
+        ppListOfListListOn = PcapPacketUtils.mergeSignatures(ppListOfListListOn, sortedAllConversation);
+        count = 0;
+        for (List<List<PcapPacket>> ll : ppListOfListListOn) {
+            PrintUtils.serializeClustersIntoFile("./onSignature" + ++count + ".sig", ll);
+            ppListOfListReadOn.add(PrintUtils.serializeClustersFromFile("./onSignature" + count + ".sig"));
+        }
+
         System.out.println("========================================");
         System.out.println("       Clustering results for OFF       ");
         System.out.println("       Number of clusters: " + offClusters.size());
         count = 0;
-        List<List<PcapPacket>> ppListOfListReadOff = null;
+        List<List<List<PcapPacket>>> ppListOfListReadOff = new ArrayList<>();
+        List<List<List<PcapPacket>>> ppListOfListListOff = new ArrayList<>();
         for (Cluster<PcapPacketPair> c : offClusters) {
             System.out.println(String.format("<<< Cluster #%03d (%06d points) >>>", ++count, c.getPoints().size()));
             System.out.print(PrintUtils.toSummaryString(c));
             if(c.getPoints().size() > 45 && c.getPoints().size() < 55) {
                 // Print to file
                 List<List<PcapPacket>> ppListOfList = PcapPacketUtils.clusterToListOfPcapPackets(c);
-                PrintUtils.serializeClustersIntoFile("./offSignature" + count + ".sig", ppListOfList);
-                ppListOfListReadOff =
-                        PrintUtils.serializeClustersFromFile("./offSignature" + count + ".sig");
+                ppListOfListListOff.add(ppListOfList);
             }
         }
+        // TODO: Merging test
+        ppListOfListListOff = PcapPacketUtils.mergeSignatures(ppListOfListListOff, sortedAllConversation);
+        count = 0;
+        for (List<List<PcapPacket>> ll : ppListOfListListOff) {
+            PrintUtils.serializeClustersIntoFile("./offSignature" + ++count + ".sig", ll);
+            ppListOfListReadOff.add(PrintUtils.serializeClustersFromFile("./offSignature" + count + ".sig"));
+        }
+
         System.out.println("========================================");
         // ============================================================================================================
 
index 0db25e32737718a1d863f960040fe267ff2cd20a..bd7f9ac515718cef5585f911ab2132d755000722 100644 (file)
@@ -21,6 +21,15 @@ import static edu.uci.iotproject.util.PcapPacketUtils.*;
  */
 public class TcpConversationUtils {
 
+    /**
+     * Identifies the adjacency type of the signature for merging.
+     */
+    public enum SignaturePosition {
+        NOT_ADJACENT,
+        LEFT_ADJACENT,
+        RIGHT_ADJACENT
+    }
+
     /**
      * <p>
      *      Given a {@link Conversation}, extract its set of "packet pairs", i.e., pairs of request-reply packets.
@@ -325,4 +334,117 @@ public class TcpConversationUtils {
             sb.append(" ");
         }
     }
+
+    /**
+     * Given a list of {@link Conversation} objects, sort them by timestamps.
+     * @param conversations The list of {@link Conversation} objects to be sorted.
+     * @return A sorted list of {@code Conversation} based on timestamps of the first
+     *          packet in the {@code Conversation}.
+     */
+    public static List<Conversation> sortConversationList(List<Conversation> conversations) {
+        // Get rid of Conversation objects with no packets
+        conversations.removeIf(x -> x.getPackets().size() == 0);
+        // Sort the list based on the first packet's timestamp!
+        Collections.sort(conversations, (c1, c2) ->
+                c1.getPackets().get(0).getTimestamp().compareTo(c2.getPackets().get(0).getTimestamp()));
+        return conversations;
+    }
+
+    /**
+     * Given a {@code List} of {@link Conversation} objects, find one that has the given {@code List}
+     * of {@code PcapPacket}.
+     * @param conversations The {@code List} of {@link Conversation} objects as reference.
+     * @param ppList The {@code List} of {@code PcapPacket} objects to search in the {@code List} of {@link Conversation}.
+     * @return A {@code Conversation} that contains the given {@code List} of {@code PcapPacket}.
+     */
+    public static Conversation returnConversation(List<PcapPacket> ppList, List<Conversation> conversations) {
+        // TODO: This part of comparison takes into account that the list of conversations is not sorted
+        // TODO: We could optimize this to have a better performance by requiring a sorted-by-timestamp list
+        // TODO:    as a parameter
+        // Find a Conversation that ppList is part of
+        for (Conversation c : conversations) {
+            // Figure out if c is the Conversation that ppList is in
+            if (isPartOfConversation(ppList, c)) {
+                return c;
+            }
+        }
+        // Return null if not found
+        return null;
+    }
+
+    /**
+     * Given a {@link Conversation} objects, check if {@code List} of {@code PcapPacket} is part of it and return the
+     * adjacency label based on {@code SignaturePosition}.
+     * @param conversation The {@link Conversation} object as reference.
+     * @param ppListFirst The first {@code List} of {@code PcapPacket} objects in the {@link Conversation}.
+     * @param ppListSecond The second {@code List} of {@code PcapPacket} objects in the {@link Conversation} whose
+     *                     position will be observed in the {@link Conversation} with respect to ppListFirst.
+     * @return A {@code SignaturePosition} that represents the position of the signature against another signature
+     *          in a {@link Conversation}.
+     */
+    public static SignaturePosition isPartOfConversationAndAdjacent(List<PcapPacket> ppListFirst,
+                                                                    List<PcapPacket> ppListSecond,
+                                                                    Conversation conversation) {
+        for (PcapPacket pp : conversation.getPackets()) {
+            // Take the first element in ppList and compare it
+            // The following elements in ppList are guaranteed to be in the same Conversation
+            // TODO: This part of comparison takes into account that the list of conversations is not sorted
+            // TODO: We could optimize this to have a better performance by requiring a sorted-by-timestamp list
+            // TODO:    as a parameter
+            if (isPartOfConversation(ppListSecond, conversation)) {
+                // Compare the first element of ppListSecond with the last element of ppListFirst to know
+                // whether ppListSecond is RIGHT_ADJACENT relative to ppListFirst
+                PcapPacket lastElOfFirstList = ppListFirst.get(ppListFirst.size() - 1);
+                PcapPacket firstElOfSecondList = ppListSecond.get(0);
+                // If the positions of the two are in order, then they are adjacent
+                int indexOfLastElOfFirstList = returnIndexInConversation(lastElOfFirstList, conversation);
+                int indexOfFirstElOfSecondList = returnIndexInConversation(firstElOfSecondList, conversation);
+                if(indexOfLastElOfFirstList + 1 == indexOfFirstElOfSecondList) {
+                    return SignaturePosition.RIGHT_ADJACENT;
+                }
+                // NOT RIGHT_ADJACENT, so check for LEFT_ADJACENT
+                // Compare the first element of ppListRight with the last element of ppListSecond to know
+                // whether ppListSecond is LEFT_ADJACENT relative to ppListFirst
+                PcapPacket firstElOfFirstList = ppListFirst.get(0);
+                PcapPacket lastElOfSecondList = ppListSecond.get(ppListSecond.size() - 1);
+                // If the positions of the two are in order, then they are adjacent
+                int indexOfFirstElOfFirstList = returnIndexInConversation(firstElOfFirstList, conversation);
+                int indexOfLastElOfSecondList = returnIndexInConversation(lastElOfSecondList, conversation);
+                if(indexOfLastElOfSecondList + 1 == indexOfFirstElOfFirstList) {
+                    return SignaturePosition.LEFT_ADJACENT;
+                }
+            }
+        }
+        // Return NOT_ADJACENT if not found
+        return SignaturePosition.NOT_ADJACENT;
+    }
+
+    /**
+     * Given a {@link Conversation} objects, check if {@code List} of {@code PcapPacket} is part of it.
+     * @param conversation The {@link Conversation} object as reference.
+     * @param ppList The {@code List} of {@code PcapPacket} objects to search in the {@link Conversation}.
+     * @return A {@code Boolean} value that represents the presence of the {@code List} of {@code PcapPacket} in
+     *         the {@link Conversation}.
+     */
+    private static boolean isPartOfConversation(List<PcapPacket> ppList, Conversation conversation) {
+        // Find the first element of ppList in conversation
+        if (conversation.getPackets().contains(ppList.get(0)))
+            return true;
+        // Return false if not found
+        return false;
+    }
+
+    /**
+     * Given a {@link Conversation} objects, check the index of a {@code PcapPacket} in it.
+     * @param conversation The {@link Conversation} object as reference.
+     * @param pp The {@code PcapPacket} object to search in the {@link Conversation}.
+     * @return An {@code Integer} value that gives the index of the {@code PcapPacket} in the {@link Conversation}.
+     */
+    private static int returnIndexInConversation(PcapPacket pp, Conversation conversation) {
+        // Find pp in conversation
+        if (conversation.getPackets().contains(pp))
+            return conversation.getPackets().indexOf(pp);
+        // Return -1 if not found
+        return -1;
+    }
 }
index 932c492d75c70615cded333eff5befb7d961483c..bd1f1c83cc161ddff9fa6bdb03dbe7a3cf4bbce1 100644 (file)
@@ -1,14 +1,14 @@
 package edu.uci.iotproject.util;
 
+import edu.uci.iotproject.Conversation;
 import edu.uci.iotproject.analysis.PcapPacketPair;
+import edu.uci.iotproject.analysis.TcpConversationUtils;
 import org.apache.commons.math3.stat.clustering.Cluster;
 import org.pcap4j.core.PcapPacket;
 import org.pcap4j.packet.IpV4Packet;
 import org.pcap4j.packet.TcpPacket;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Objects;
+import java.util.*;
 
 /**
  * Utility methods for inspecting {@link PcapPacket} properties.
@@ -18,6 +18,13 @@ import java.util.Objects;
  */
 public final class PcapPacketUtils {
 
+    /**
+     * This is the threshold value for a signature's number of members
+     * If after a merging the number of members of a signature falls below this threshold, then we can boldly
+     * get rid of that signature.
+     */
+    private static final int SIGNATURE_MERGE_THRESHOLD = 5;
+
     /**
      * Gets the source IP (in decimal format) of an IPv4 packet.
      * @param packet The packet for which the IPv4 source address is to be extracted.
@@ -102,7 +109,79 @@ public final class PcapPacketUtils {
             // Create a list of list of PcapPacket objects
             ppListOfList.add(ppList);
         }
+        // Sort the list of lists based on the first packet's timestamp!
+        Collections.sort(ppListOfList, (p1, p2) -> p1.get(0).getTimestamp().compareTo(p2.get(0).getTimestamp()));
         return ppListOfList;
     }
 
+    /**
+     * Merge signatures in {@code List} of {@code List} of {@code List} of {@code PcapPacket} objects.
+     * We cross-check these with {@code List} of {@code Conversation} objects to see
+     * if two {@code List} of {@code PcapPacket} objects actually belong to the same {@code Conversation}.
+     *
+     * @param signatures A {@link List} of {@link List} of {@link List} of
+     *          {@link PcapPacket} objects that needs to be checked and merged.
+     * @param conversations A {@link List} of {@link Conversation} objects as reference for merging.
+     * @return A {@link List} of {@link List} of {@link List} of
+     *          {@link PcapPacket} objects as the result of the merging.
+     */
+    public static List<List<List<PcapPacket>>>
+            mergeSignatures(List<List<List<PcapPacket>>> signatures, List<Conversation> conversations) {
+        // Make a copy first
+        List<List<List<PcapPacket>>> copySignatures = new ArrayList<>(signatures);
+        // Traverse and look into the pairs of signatures
+        for (int first = 0; first < signatures.size(); first++) {
+            List<List<PcapPacket>> firstList = signatures.get(first);
+            for (int second = first+1; second < signatures.size(); second++) {
+                int maxSignatureEl = 0; // Number of maximum signature elements
+                List<List<PcapPacket>> secondList = signatures.get(second);
+                int initialSecondListMembers = secondList.size();
+                // Iterate over the signatures in the first list
+                for (List<PcapPacket> signature : firstList) {
+                    signature.removeIf(el -> el == null); // Clean up null elements
+                    // Return the Conversation that the signature is part of
+                    Conversation conv = TcpConversationUtils.returnConversation(signature, conversations);
+                    // Find the element of the second list that is a match for that Conversation
+                    for (List<PcapPacket> ppList : secondList) {
+                        ppList.removeIf(el -> el == null); // Clean up null elements
+                        // Check if they are part of a Conversation and are adjacent to the first signature
+                        // If yes then merge into the first list
+                        TcpConversationUtils.SignaturePosition position =
+                                TcpConversationUtils.isPartOfConversationAndAdjacent(signature, ppList, conv);
+                        if (position == TcpConversationUtils.SignaturePosition.LEFT_ADJACENT) {
+                            // Merge to the left side of the first signature
+                            ppList.addAll(signature);
+                            signature = ppList;
+                            maxSignatureEl = signature.size() > maxSignatureEl ? signature.size() : maxSignatureEl;
+                            secondList.remove(ppList); // Remove as we merge
+                            //System.out.println("LEFT_ADJACENT!");
+                            break;
+                        } else if (position == TcpConversationUtils.SignaturePosition.RIGHT_ADJACENT) {
+                            // Merge to the right side of the first signature
+                            signature.addAll(ppList);
+                            maxSignatureEl = signature.size() > maxSignatureEl ? signature.size() : maxSignatureEl;
+                            secondList.remove(ppList); // Remove as we merge
+                            //System.out.println("RIGHT_ADJACENT!");
+                            break;
+                        } // TcpConversationUtils.SignaturePosition.NOT_ADJACENT
+                        //System.out.println("NOT_ADJACENT!");
+                    }
+                }
+                // Call it a successful merging if there are only less than 5 elements from the second list that
+                // cannot be merged
+                if (secondList.size() < SIGNATURE_MERGE_THRESHOLD) {
+                    // Prune the unsuccessfully merged signatures (i.e., these will have size() < maxSignatureEl)
+                    final int maxNumOfEl = maxSignatureEl;
+                    firstList.removeIf(el -> el.size() < maxNumOfEl);
+                    // Remove the merged set of signatures when successful
+                    signatures.remove(secondList);
+                } else if (secondList.size() < initialSecondListMembers) {
+                    // If only some of the signatures from the second list are merged, this means UNSUCCESSFUL merging
+                    // Return the original copy of the signatures object
+                    return copySignatures;
+                }
+            }
+        }
+        return signatures;
+    }
 }