Refactor run_iperf function to handle timeouts and improve error handling

This commit is contained in:
Yaro Kasear 2025-04-24 09:03:49 -05:00
parent e75b3807f9
commit 4b9ad6f609

View file

@ -1,104 +1,84 @@
#!/bin/bash
# set -x
set -euo pipefail
trap 'echo "[✖] Execution halted at line $LINENO. An unexpected error occurred. Please contact your nearest bash therapist." >&2' ERR
set -uo pipefail
trap 'echo "[✖] Execution halted at line $LINENO. Please consult your nearest bash therapist." >&2' ERR
IFS=$'\n\t'
source settings.env
FAILURE_LOG="${TEST_FILE%.csv}-failures.log"
SCRIPT_START=$(date +%s)
# Check if email recipient is set
if [ -z "$RECIPIENT" ]; then
echo "[!] Please set the RECIPIENT variable in settings.env."
log() {
echo "[+] $*" >&2
}
warn() {
echo "[!] $*" >&2
}
die() {
warn "$*"
exit 1
fi
}
[ -z "$RECIPIENT" ] && die "Please set the RECIPIENT variable in settings.env."
sudo -v
while true; do
sudo -n true
sleep 60
done 2>/dev/null &
while true; do sudo -n true; sleep 60; done 2>/dev/null &
SUDO_KEEPALIVE_PID=$!
echo "Starting kismet..."
log "Starting kismet..."
SECONDS=0
sudo systemctl start kismet
echo "This took $SECONDS seconds."
log "Kismet startup took $SECONDS seconds"
echo "Saturating the capture..."
log "Saturating the capture..."
sleep "$LEAD_TIME"
sleep $LEAD_TIME
# Function to get current TX failed count
get_tx_failed() {
iw dev $INTERFACE station dump | awk '/tx failed/ {print $3}'
iw dev "$INTERFACE" station dump | awk '/tx failed/ {print $3}'
}
freq_to_channel() {
local freq=$1
local channel=0
if [ "$freq" -ge 2412 ] && [ "$freq" -le 2472 ]; then
channel=$(((freq - 2407) / 5))
elif [ "$freq" -eq 2484 ]; then
channel=14
elif [ "$freq" -ge 5180 ] && [ "$freq" -le 5825 ]; then
channel=$(((freq - 5000) / 5))
else
channel="Unknown"
fi
echo "$channel"
if (( freq >= 2412 && freq <= 2472 )); then echo $(((freq - 2407) / 5))
elif (( freq == 2484 )); then echo 14
elif (( freq >= 5180 && freq <= 5825 )); then echo $(((freq - 5000) / 5))
else echo "Unknown"; fi
}
run_iperf() {
local target="$1"
local mode="$2"
local direction="$3"
local args=("-c" "$target" "-J" "-t" "10")
local target=$1 mode=$2 direction=$3
local args=("-c" "$target" "-J" "-t" "$IPERF_DURATION")
[ "$mode" = "udp" ] && args+=("-u" "-b" "$BANDWIDTH")
[ "$direction" = "down" ] && args+=("--reverse")
if [ "$mode" = "udp" ]; then
args+=("-u" "-b" "$BANDWIDTH")
fi
local tmp_err=$(mktemp) tmp_json=$(mktemp)
local timeout_duration=$((IPERF_DURATION + 20))
if [ "$direction" = "down" ]; then
args+=("--reverse")
fi
local tmp_err
tmp_err=$(mktemp)
local tmp_json
tmp_json=$(mktemp)
sleep 1s
log "Running iperf3 $mode $direction to $target..."
SECONDS=0
echo "Running iperf3 $mode $direction to $target..." >&2
iperf3 "${args[@]}" -J >"$tmp_json" 2>"$tmp_err"
echo "This took $SECONDS seconds." >&2
if ! timeout "${timeout_duration}s" iperf3 "${args[@]}" >"$tmp_json" 2>"$tmp_err"; then
warn "iperf3 $mode $direction to $target failed or timed out"
echo "0"
return
fi
log "iperf3 completed in $SECONDS seconds"
echo "Parsing iperf3 output..." >&2
SECONDS=0
local parsed
parsed=$(jq -r '
if .error then
"iperf3-error"
elif has("end") | not then
"no-end"
elif .end | has("sum_received") then
.end.sum_received.bits_per_second
elif .end | has("sum") then
.end.sum.bits_per_second
else
"unexpected-format"
end' "$tmp_json" || echo "execution-failed")
echo "This took $SECONDS seconds." >&2
if .error then "iperf3-error"
elif has("end") | not then "no-end"
elif .end | has("sum_received") then .end.sum_received.bits_per_second
elif .end | has("sum") then .end.sum.bits_per_second
else "unexpected-format" end' "$tmp_json" 2>/dev/null || echo "execution-failed")
if [[ "$parsed" == "iperf3-error" || "$parsed" == "no-end" || "$parsed" == "unexpected-format" || "$parsed" == "execution-failed" ]]; then
log "Parsed iperf3 result in $SECONDS seconds"
if [[ "$parsed" =~ ^(iperf3-error|no-end|unexpected-format|execution-failed)$ ]]; then
timestamp=$(date -Iseconds)
echo "$timestamp,iperf $mode $direction to $target failed with '$parsed'" >>"$FAILURE_LOG"
echo "[stderr] $(cat "$tmp_err")" >>"$FAILURE_LOG"
@ -111,149 +91,101 @@ run_iperf() {
rm -f "$tmp_err" "$tmp_json"
}
# Start test email
# Send start email
SECONDS=0
echo "Sending start email..."
echo -e "Subject: Test ${BOOT_ID} Started\n\nThis is to inform you that the tests have commenced for test ${BOOT_ID}." | msmtp "$RECIPIENT"
echo "This took $SECONDS seconds."
echo -e "Subject: Test ${BOOT_ID} Started\n\nTest ${BOOT_ID} has commenced." | msmtp "$RECIPIENT"
log "Start email sent in $SECONDS seconds"
FAILED_START=$(get_tx_failed)
# Create CSV header if needed
if [ ! -f "$TEST_FILE" ]; then
echo "StartTimestamp,EndTimestamp,Link,Level,Noise,BSSID,TX Bitrate,RX Bitrate,$(speedtest --csv-header),TX Failures,Channel,Frequency,Packet Loss,Jitter,LocalTCPUp,LocalTCPDown,LocalUDPUp,LocalUDPDown,RemoteTCPUp,RemoteTCPDown,RemoteUDPUp,RemoteUDPDown" >"$TEST_FILE"
fi
# CSV setup
[ ! -f "$TEST_FILE" ] && echo "StartTimestamp,EndTimestamp,Link,Level,Noise,BSSID,TX Bitrate,RX Bitrate,$(speedtest --csv-header),TX Failures,Channel,Frequency,Packet Loss,Jitter,LocalTCPUp,LocalTCPDown,LocalUDPUp,LocalUDPDown,RemoteTCPUp,RemoteTCPDown,RemoteUDPUp,RemoteUDPDown" >"$TEST_FILE"
for ((COUNTER = 1; COUNTER <= NUM_TESTS; COUNTER++)); do
echo "Executing test run $COUNTER of $NUM_TESTS..."
log "Test run $COUNTER of $NUM_TESTS"
for ((i = 1; i <= NUM_SAMPLES; i++)); do
echo " Gathering sample $i of $NUM_SAMPLES..."
log " Sample $i of $NUM_SAMPLES"
START_TIME=$(date -Iseconds)
# Wireless stats
link_level_noise=$(awk 'NR==3 {gsub(/\./, "", $3); gsub(/\./, "", $4); gsub(/\./, "", $5); print $3","$4","$5}' /proc/net/wireless)
bssid_and_bitrate=$(iw dev "$INTERFACE" link | awk '/Connected/ {bssid=$3} /tx bitrate/ {tx=$3} /rx bitrate/ {rx=$3} END {print bssid","tx","rx}')
speed_results=""
for ((retry = 1; retry <= MAX_RETRIES; retry++)); do
SECONDS=0
echo " Attempting speed test (try $retry)..."
speed_results=$(speedtest --secure --csv 2>/dev/null) || true
echo " This took $SECONDS seconds."
if [[ -n "$speed_results" ]]; then
break
fi
echo " [!] Speedtest failed at $(date -Iseconds). Retrying in $RETRY_DELAY seconds..."
log " Speed test attempt $retry"
speed_results=$(speedtest --secure --csv 2>/dev/null || true)
log " Speed test took $SECONDS seconds"
[[ -n "$speed_results" ]] && break
warn " Speedtest failed. Retrying in $RETRY_DELAY seconds..."
sleep "$RETRY_DELAY"
done
if [[ -z "$speed_results" ]]; then
TIMESTAMP=$(date -Iseconds)
echo " [!] Speedtest permanently failed at $TIMESTAMP. Skipping sample $i of test $COUNTER."
echo "$TIMESTAMP,Test $COUNTER,Sample $i" >>"${TEST_FILE%.csv}-failures.log"
timestamp=$(date -Iseconds)
warn " Speedtest permanently failed. Skipping sample."
echo "$timestamp,Test $COUNTER,Sample $i" >>"$FAILURE_LOG"
continue
fi
SECONDS=0
echo " Gathering TX failed count..."
log " Gathering TX failed count..."
FAILED_NOW=$(get_tx_failed)
FAILED_DELTA=$((FAILED_NOW - FAILED_START))
FAILED_START=$FAILED_NOW
echo " This took $SECONDS seconds."
log " TX count gathered in $SECONDS seconds"
freq=$(iw dev "$INTERFACE" link | awk '/freq:/ {print $2}')
channel=$(freq_to_channel "$freq")
SECONDS=0
echo " Running ping test..."
packet_loss=$(ping -c "$PING_COUNT" -q "$PING_TARGET" | grep -oP '\d+(?=% packet loss)')
jitter=$(ping -c "$PING_COUNT" "$PING_TARGET" | grep "time=" | awk '{print $(NF-1)}' | sed 's/time=//g' | awk '{sum+=$1; sumsq+=$1*$1} END {if (NR>1) print sqrt(sumsq/NR - (sum/NR)**2); else print 0}')
echo " This took $SECONDS seconds."
log " Running ping test..."
ping_output=$(ping -c "$PING_COUNT" "$PING_TARGET")
packet_loss=$(echo "$ping_output" | grep -oP '\d+(?=% packet loss)')
jitter=$(echo "$ping_output" | grep "time=" | awk '{print $(NF-1)}' | sed 's/time=//g' | awk '{sum+=$1; sumsq+=$1*$1} END {if (NR>1) print sqrt(sumsq/NR - (sum/NR)**2); else print 0}')
log " Ping test took $SECONDS seconds"
echo " Running iperf3 tests..."
log " Running iperf3 tests..."
LocalTCPUp=$(run_iperf "$IPERF_LOCAL_TARGET" tcp up)
LocalTCPDown=$(run_iperf "$IPERF_LOCAL_TARGET" tcp down)
LocalUDPUp=$(run_iperf "$IPERF_LOCAL_TARGET" udp up)
LocalUDPDown=$(run_iperf "$IPERF_LOCAL_TARGET" udp down)
RemoteTCPUp=$(run_iperf "$IPERF_REMOTE_TARGET" tcp up)
RemoteTCPDown=$(run_iperf "$IPERF_REMOTE_TARGET" tcp down)
RemoteUDPUp=$(run_iperf "$IPERF_REMOTE_TARGET" udp up)
RemoteUDPDown=$(run_iperf "$IPERF_REMOTE_TARGET" udp down)
END_TIME=$(date -Iseconds)
echo "$START_TIME,$END_TIME,$link_level_noise,$bssid_and_bitrate,$speed_results,$FAILED_DELTA,$channel,$freq,$packet_loss,$jitter,$LocalTCPUp,$LocalTCPDown,$LocalUDPUp,$LocalUDPDown,$RemoteTCPUp,$RemoteTCPDown,$RemoteUDPUp,$RemoteUDPDown" >>"$TEST_FILE"
done
if [ "$COUNTER" -lt "$NUM_TESTS" ]; then
echo "Waiting $TIME_BETWEEN before the next test run..."
sleep "$TIME_BETWEEN"
fi
[[ "$COUNTER" -lt "$NUM_TESTS" ]] && log "Waiting $TIME_BETWEEN before next test..." && sleep "$TIME_BETWEEN"
done
echo "Stopping kismet..."
log "Stopping kismet..."
sudo systemctl stop kismet
# Let's enrich the data with passive metrics.
echo "Enriching the data..."
KISMET_LOG=$(find ~/kismet_logs -type f -name "*.pcapng" -printf "%T@ %p\n" | sort -n | tail -1 | cut -d' ' -f2-)
if [ -z "$KISMET_LOG" ] || [ ! -f "$KISMET_LOG" ]; then
echo "[!] Packet capture not found."
exit 1
fi
log "Enriching data..."
KISMET_LOG=$(find "$KISMET_LOG_DIR" -type f -name "*.pcapng" -printf "%T@ %p\n" | sort -n | tail -1 | cut -d' ' -f2-)
[ ! -f "$KISMET_LOG" ] && die "Packet capture not found."
SECONDS=0
python3 "$SCRIPT_DIRECTORY/enrich.py" --csv "$TEST_FILE" --pcapng "$KISMET_LOG" --output "$ENRICHED_FILE"
echo "This took $SECONDS seconds."
log "Enrichment took $SECONDS seconds"
# Final email with attachment(s)
EMAIL_BODY="The test with UID ${BOOT_ID} is complete. Please collect the probe. Data is attached."
EMAIL_SUBJECT="Test ${BOOT_ID} Complete"
# Construct list of attachments safely
ATTACHMENTS=()
if [ -f "$ENRICHED_FILE" ]; then
ATTACHMENTS+=("$ENRICHED_FILE")
fi
[ -f "$ENRICHED_FILE" ] && ATTACHMENTS+=("$ENRICHED_FILE")
[ -f "$FAILURE_LOG" ] && ATTACHMENTS+=("$FAILURE_LOG")
[ -f "$SSID_METRICS_FILE" ] && ATTACHMENTS+=("$SSID_METRICS_FILE")
if [ -f "$FAILURE_LOG" ]; then
ATTACHMENTS+=("$FAILURE_LOG")
echo "[+] Attaching failure log: $FAILURE_LOG"
fi
if [ -f "$SSID_METRICS_FILE" ]; then
ATTACHMENTS+=("$SSID_METRICS_FILE")
echo "[+] Attaching SSID metrics file: $SSID_METRICS_FILE"
fi
# Check if there's at least one file to send
if [ ${#ATTACHMENTS[@]} -eq 0 ]; then
echo "[!] No files to attach. Email not sent."
warn "No files to attach. Email not sent."
else
# Print attachments for debugging
for file in "${ATTACHMENTS[@]}"; do
echo "[DEBUG] Attaching: '$file'"
done
# Safely quote and attach
ATTACHMENT_FLAGS=()
for file in "${ATTACHMENTS[@]}"; do
ATTACHMENT_FLAGS+=("-a" "$file")
done
echo "[+] Sending email to $RECIPIENT with attachments: ${ATTACHMENTS[*]}"
SECONDS=0
echo "$EMAIL_BODY" | mutt -s "$EMAIL_SUBJECT" "${ATTACHMENT_FLAGS[@]}" -- "$RECIPIENT"
echo "This took $SECONDS seconds."
for file in "${ATTACHMENTS[@]}"; do log "Attaching: $file"; done
echo "$EMAIL_BODY" | mutt -s "Test ${BOOT_ID} Complete" "${ATTACHMENTS[@]/#/-a }" -- "$RECIPIENT"
log "Email sent to $RECIPIENT with attachments."
fi
echo "[+] Email sent to $RECIPIENT with attachments: ${ATTACHMENTS[*]}"
sudo kill $SUDO_KEEPALIVE_PID
sudo kill "$SUDO_KEEPALIVE_PID"
SCRIPT_END=$(date +%s)
log "Full test cycle completed in $((SCRIPT_END - SCRIPT_START)) seconds"