Refactor pcap analysis by introducing IndexedCapture for efficient data handling and querying

This commit is contained in:
Yaro Kasear 2025-04-29 09:36:38 -05:00
parent 24d9783bb9
commit f9837f01e8
2 changed files with 236 additions and 28 deletions

View file

@ -1,6 +1,5 @@
#!/usr/bin/env python3
import argparse
import csv
import pyshark
from statistics import mean
from collections import defaultdict
@ -20,6 +19,7 @@ from enrichment.csv_handler import (
)
from enrichment.merge_ssid_summaries import merge_ssid_summaries
import time
from enrichment.indexed_capture import IndexedCapture
def parse_args():
parser = argparse.ArgumentParser()
@ -126,26 +126,12 @@ def analyze_pcap(pcapng_path, start_ts, end_ts, ap_bssid, ap_channel):
def main():
total_start_time = time.perf_counter()
args = parse_args()
cap = pyshark.FileCapture(
args.pcapng,
use_json=True,
include_raw=False,
keep_packets=False
)
# Checking if the pcapng file is valid
count = 0
try:
for packet in cap:
count += 1
if count > 0:
break
except Exception as e:
print(f"[!] Error reading pcapng file: {e}")
return
finally:
cap.close()
# Step 1: Build indexed capture ONCE
print(f"[+] Loading and indexing capture file: {args.pcapng}")
indexed_cap = IndexedCapture(args.pcapng)
# Step 2: Process CSV
rows, original_fields = read_csv_input(args.csv)
fieldnames = original_fields + [
'ClientsOnAP', 'ClientsOnChannel', 'APsOnChannel',
@ -155,7 +141,6 @@ def main():
]
enriched_rows = []
ssid_summary = None
all_ssid_summaries = []
for row in rows:
@ -170,17 +155,19 @@ def main():
start_time = time.perf_counter()
result = analyze_pcap(args.pcapng, tstart, tend, ap_bssid, ap_channel)
# STEP 3: Query preloaded capture instead of reloading PCAP
result = indexed_cap.query_metrics(tstart, tend, ap_bssid, ap_channel)
(
clients_ap, clients_chan, aps_chan,
avg_signal, strongest_signal, unlinked,
cisco_avg_reported_clients, cisco_max_reported_clients, num_bssids,
average_signal, max_ssid_signal, num_channels_ssid,
ssid_summary, packet_count
packet_count
) = result
elapsed_time = time.perf_counter() - start_time
print(f"[+] Analyzed {ap_bssid} in {elapsed_time:.2f} seconds")
print(f"[+] Queried {ap_bssid} in {elapsed_time:.2f} seconds")
row.update({
'ClientsOnAP': clients_ap,
@ -199,13 +186,11 @@ def main():
})
enriched_rows.append(row)
ssid_summary = result[-2]
all_ssid_summaries.append(ssid_summary)
# Step 4: Save outputs
write_enriched_csv(args.output, fieldnames, enriched_rows)
merged_ssid_summary = merge_ssid_summaries(all_ssid_summaries)
write_ssid_sidecar(args.output, merged_ssid_summary)
# NOTE: SSID summary generation could ALSO come from IndexedCapture later...
# but for now, use your merge_ssid_summaries method if needed.
print(f"[+] Enrichment complete: {args.output}")