+ TriggerTimesFileReader ttfr = new TriggerTimesFileReader();
+ List<Instant> triggerTimes = ttfr.readTriggerTimes(triggerTimesFile, false);
+ // Tag each trigger with "ON" or "OFF", assuming that the first trigger is an "ON" and that they alternate.
+ List<UserAction> userActions = new ArrayList<>();
+ for (int i = 0; i < triggerTimes.size(); i++) {
+ userActions.add(new UserAction(i % 2 == 0 ? Type.TOGGLE_ON : Type.TOGGLE_OFF, triggerTimes.get(i)));
+ }
+ TriggerTrafficExtractor tte = new TriggerTrafficExtractor(inputPcapFile, triggerTimes, deviceIp);
+ final PcapDumper outputter = Pcaps.openDead(DataLinkType.EN10MB, 65536).dumpOpen(outputPcapFile);
+ DnsMap dnsMap = new DnsMap();
+ TcpReassembler tcpReassembler = new TcpReassembler();
+ TrafficLabeler trafficLabeler = new TrafficLabeler(userActions);
+ tte.performExtraction(pkt -> {
+ try {
+ outputter.dump(pkt);
+ } catch (NotOpenException e) {
+ e.printStackTrace();
+ }
+ }, dnsMap, tcpReassembler, trafficLabeler);
+ outputter.flush();
+ outputter.close();
+
+ if (tte.getPacketsIncludedCount() != trafficLabeler.getTotalPacketCount()) {
+ // Sanity/debug check
+ throw new AssertionError(String.format("mismatch between packet count in %s and %s",
+ TriggerTrafficExtractor.class.getSimpleName(), TrafficLabeler.class.getSimpleName()));
+ }
+
+ // Extract all conversations present in the filtered trace.
+ List<Conversation> allConversations = tcpReassembler.getTcpConversations();
+ // Group conversations by hostname.
+ Map<String, List<Conversation>> convsByHostname = TcpConversationUtils.groupConversationsByHostname(allConversations, dnsMap);
+ System.out.println("Grouped conversations by hostname.");
+ // For each hostname, count the frequencies of packet lengths exchanged with that hostname.
+ final Map<String, Map<Integer, Integer>> pktLenFreqsByHostname = new HashMap<>();
+ convsByHostname.forEach((host, convs) -> pktLenFreqsByHostname.put(host, TcpConversationUtils.countPacketLengthFrequencies(convs)));
+ System.out.println("Counted frequencies of packet lengths exchanged with each hostname.");
+ // For each hostname, count the frequencies of packet sequences (i.e., count how many conversations exchange a
+ // sequence of packets of some specific lengths).
+ final Map<String, Map<String, Integer>> pktSeqFreqsByHostname = new HashMap<>();
+ convsByHostname.forEach((host, convs) -> pktSeqFreqsByHostname.put(host, TcpConversationUtils.countPacketSequenceFrequencies(convs)));
+ System.out.println("Counted frequencies of packet sequences exchanged with each hostname.");
+ // For each hostname, count frequencies of packet pairs exchanged with that hostname across all conversations
+ final Map<String, Map<String, Integer>> pktPairFreqsByHostname =
+ TcpConversationUtils.countPacketPairFrequenciesByHostname(allConversations, dnsMap);
+ System.out.println("Counted frequencies of packet pairs per hostname");
+ // For each user action, reassemble the set of TCP connections occurring shortly after
+ final Map<UserAction, List<Conversation>> userActionToConversations = trafficLabeler.getLabeledReassembledTcpTraffic();
+ final Map<UserAction, Map<String, List<Conversation>>> userActionsToConvsByHostname = trafficLabeler.getLabeledReassembledTcpTraffic(dnsMap);
+ System.out.println("Reassembled TCP conversations occurring shortly after each user event");
+
+
+
+ /*
+ * NOTE: no need to generate these more complex on/off maps that also contain mappings from hostname and
+ * sequence identifiers as we do not care about hostnames and sequences during clustering.
+ * We can simply use the UserAction->List<Conversation> map to generate ON/OFF groupings of conversations.
+ */
+ /*
+ // Contains all ON events: hostname -> sequence identifier -> list of conversations with that sequence
+ Map<String, Map<String, List<Conversation>>> ons = new HashMap<>();
+ // Contains all OFF events: hostname -> sequence identifier -> list of conversations with that sequence
+ Map<String, Map<String, List<Conversation>>> offs = new HashMap<>();
+ userActionsToConvsByHostname.forEach((ua, hostnameToConvs) -> {
+ Map<String, Map<String, List<Conversation>>> outer = ua.getType() == Type.TOGGLE_ON ? ons : offs;
+ hostnameToConvs.forEach((host, convs) -> {
+ Map<String, List<Conversation>> seqsToConvs = TcpConversationUtils.
+ groupConversationsByPacketSequence(convs, verbose);
+ outer.merge(host, seqsToConvs, (oldMap, newMap) -> {
+ newMap.forEach((sequence, cs) -> oldMap.merge(sequence, cs, (list1, list2) -> {
+ list1.addAll(list2);
+ return list1;
+ }));
+ return oldMap;
+ });
+ });
+ });
+ */
+
+ // ================================================ CLUSTERING ================================================
+ // Note: no need to use the more convoluted on/off maps; can simply use the UserAction->List<Conversation> map
+ // when don't care about hostnames and sequences (see comment earlier).
+ List<Conversation> onConversations = userActionToConversations.entrySet().stream().
+ filter(e -> e.getKey().getType() == Type.TOGGLE_ON). // drop all OFF events from stream
+ map(e -> e.getValue()). // no longer interested in the UserActions
+ flatMap(List::stream). // flatten List<List<T>> to a List<T>
+ collect(Collectors.toList());
+ List<Conversation> offConversations = userActionToConversations.entrySet().stream().
+ filter(e -> e.getKey().getType() == Type.TOGGLE_OFF).
+ map(e -> e.getValue()).
+ flatMap(List::stream).
+ collect(Collectors.toList());
+ List<PcapPacketPair> onPairs = onConversations.stream().
+ map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) :
+ TcpConversationUtils.extractPacketPairs(c)).
+ flatMap(List::stream). // flatten List<List<>> to List<>
+ collect(Collectors.toList());
+ List<PcapPacketPair> offPairs = offConversations.stream().
+ map(c -> c.isTls() ? TcpConversationUtils.extractTlsAppDataPacketPairs(c) :
+ TcpConversationUtils.extractPacketPairs(c)).
+ flatMap(List::stream). // flatten List<List<>> to List<>
+ collect(Collectors.toList());
+ // Note: need to update the DnsMap of all PcapPacketPairs if we want to use the IP/hostname-sensitive distance.
+ Stream.concat(Stream.of(onPairs), Stream.of(offPairs)).flatMap(List::stream).forEach(p -> p.setDnsMap(dnsMap));
+ // Perform clustering on conversation logged as part of all ON events.
+ DBSCANClusterer<PcapPacketPair> onClusterer = new DBSCANClusterer<>(10.0, 5);
+ List<Cluster<PcapPacketPair>> onClusters = onClusterer.cluster(onPairs);
+ // Perform clustering on conversation logged as part of all OFF events.
+ DBSCANClusterer<PcapPacketPair> offClusterer = new DBSCANClusterer<>(10.0, 5);
+ List<Cluster<PcapPacketPair>> offClusters = offClusterer.cluster(offPairs);
+ // Output clusters
+ System.out.println("========================================");
+ System.out.println(" Clustering results for ON ");
+ System.out.println(" Number of clusters: " + onClusters.size());
+ int count = 0;
+ for (Cluster<PcapPacketPair> c : onClusters) {
+ System.out.println(String.format("<<< Cluster #%02d (%03d points) >>>", ++count, c.getPoints().size()));
+ System.out.print(PrintUtils.toSummaryString(c));
+ }
+ System.out.println("========================================");
+ System.out.println(" Clustering results for OFF ");
+ System.out.println(" Number of clusters: " + offClusters.size());
+ count = 0;
+ for (Cluster<PcapPacketPair> c : offClusters) {
+ System.out.println(String.format("<<< Cluster #%03d (%06d points) >>>", ++count, c.getPoints().size()));
+ System.out.print(PrintUtils.toSummaryString(c));
+ }
+ System.out.println("========================================");
+ // ============================================================================================================
+
+ /*
+ System.out.println("==== ON ====");
+ // Print out all the pairs into a file for ON events
+ File fileOnEvents = new File(onPairsPath);
+ PrintWriter pwOn = null;