six2dez · six2dez · Feb 25, 2026 · Feb 24, 2026 · Feb 25, 2026
@@ -1594,8 +1594,8 @@ function generate_consolidated_report() {
     # Top assets from hotlist (if present)
     if [[ -s hotlist.txt ]] && command -v jq >/dev/null 2>&1; then
         top_assets_json=$(head -n "${HOTLIST_TOP:-50}" hotlist.txt \
-            | awk '{score=$1;$1=""; sub(/^ /,"",$0); printf "{\"asset\":\"%s\",\"score\":%s}\n",$0,score}' \
-            | jq -s '.')
+            | awk '{score=$1; $1=""; sub(/^ /,"",$0); print score "\t" $0}' \
+            | jq -Rn '[inputs | split("\t") | {asset: .[1], score: (.[0] | tonumber? // 0)}]')
     else
         top_assets_json="[]"
     fi
@@ -1611,10 +1611,10 @@ function generate_consolidated_report() {
             | awk -F'] ' '{
                 ts=$1; gsub(/^\[/,"",ts);
                 msg=$2;
-                if (msg ~ /Start function:/) { print "{\"timestamp\":\"" ts "\",\"level\":\"INFO\",\"function\":\"" msg "\",\"message\":\"started\"}" }
-                else if (msg ~ /End function:/) { print "{\"timestamp\":\"" ts "\",\"level\":\"SUCCESS\",\"function\":\"" msg "\",\"message\":\"completed\"}" }
+                if (msg ~ /Start function:/) { print ts "\tINFO\t" msg "\tstarted" }
+                else if (msg ~ /End function:/) { print ts "\tSUCCESS\t" msg "\tcompleted" }
             }' \
-            | jq -s '.')
+            | jq -Rn '[inputs | split("\t") | {timestamp: .[0], level: .[1], function: .[2], message: .[3]}]')
     else
         timeline_json="[]"
     fi

@@ -617,6 +617,19 @@ function sanitize_ip() {
     return 0
 }
 
+# Sanitize a single entry from a -l list file.
+# Detects IP/CIDR vs domain and applies the appropriate sanitizer.
+# Outputs the sanitized value; returns 1 if the entry is invalid.
+# Usage: domain=$(_sanitize_list_entry "$raw") || continue
+_sanitize_list_entry() {
+    local raw="$1"
+    if [[ "$raw" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+(/[0-9]+)?$ ]]; then
+        sanitize_ip "$raw"
+    else
+        sanitize_domain "$raw"
+    fi
+}
+
 ###############################################################################################################
 ####################################### SECURITY CHECKS #######################################################
 ###############################################################################################################

@@ -38,6 +38,44 @@ _run_httpx() {
     fi
 }
 
+# Normalize wildcard-prefixed hosts in URL-like lines.
+# Examples:
+#   *.api.example.com -> api.example.com
+#   https://*.api.example.com -> https://api.example.com
+_normalize_probe_urls() {
+    sed -E 's#^(https?://)\*\.#\1#; s#^\*\.##'
+}
+
+# Detect whether a probe output file contains JSONL.
+# Returns 0 when the first non-empty line starts with "{", else 1.
+_probe_output_is_json() {
+    local input_file="$1"
+    local first_line
+    first_line="$(awk 'NF {print; exit}' "$input_file" 2>/dev/null || true)"
+    [[ "$first_line" =~ ^[[:space:]]*\{ ]]
+}
+
+# Extract in-scope URLs from a probe file that may be JSONL or plain URL list.
+# Usage: _extract_probe_urls <input_file> <domain_filter> <output_file>
+_extract_probe_urls() {
+    local input_file="$1"
+    local dom_filter="$2"
+    local output_file="$3"
+
+    [[ ! -s "$input_file" ]] && return 0
+
+    if _probe_output_is_json "$input_file"; then
+        jq -r 'try (.url // empty)' "$input_file" 2>/dev/null \
+            | awk -v dom="$dom_filter" 'index($0, dom) && $0 ~ /^https?:\/\// {print}' \
+            | _normalize_probe_urls \
+            | anew_q_safe "$output_file"
+    else
+        awk -v dom="$dom_filter" 'index($0, dom) && $0 ~ /^https?:\/\// {print}' "$input_file" 2>/dev/null \
+            | _normalize_probe_urls \
+            | anew_q_safe "$output_file"
+    fi
+}
+
 # Process httpx JSON output: extract URLs and web info
 # Usage: _process_httpx_output json_file url_output info_output
 _process_httpx_output() {
@@ -51,7 +89,7 @@ _process_httpx_output() {
     jq -r 'try .url' "$json_file" 2>/dev/null \
         | grep "$domain" \
         | grep -aEo 'https?://[^ ]+' \
-        | sed 's/*.//' \
+        | _normalize_probe_urls \
         | anew_q_safe "$url_output"
 
     # Extract plain web info
@@ -126,11 +164,12 @@ function webprobe_simple() {
 
 		        # webprobe_simple is expected to write JSONL when using httpx -json.
 		        # Some runners (or wrappers) may produce a plain URL list instead.
-		        # Detect the format early to avoid jq parse errors and missing webs/webs.txt.
-		        local probe_first_line probe_is_json
-		        probe_first_line="$(awk 'NF {print; exit}' .tmp/web_full_info_probe.txt 2>/dev/null || true)"
+		        local probe_is_json probe_input_lines urls_extracted
 		        probe_is_json=false
-		        [[ "$probe_first_line" =~ ^[[:space:]]*\\{ ]] && probe_is_json=true
+		        if _probe_output_is_json ".tmp/web_full_info_probe.txt"; then
+		            probe_is_json=true
+		        fi
+		        probe_input_lines=$(awk 'NF {c++} END {print c+0}' .tmp/web_full_info_probe.txt 2>/dev/null)
 
 		        # Always start fresh for this run (used by urlchecks diff too).
 		        : >.tmp/probed_tmp.txt 2>/dev/null || true
@@ -141,7 +180,7 @@ function webprobe_simple() {
 		            if ! cat .tmp/web_full_info_probe.txt .tmp/web_full_info.txt 2>>"$LOGFILE" \
 		                | jq -cs 'unique_by(.input)[]' 2>>"$LOGFILE" >webs/web_full_info.txt; then
 		                log_note "webprobe_simple: failed to merge httpx JSON; falling back to probe-only" "${FUNCNAME[0]}" "${LINENO}"
-		                awk 'match($0, /^[[:space:]]*\\{/) {print}' .tmp/web_full_info_probe.txt >.tmp/web_full_info_merge_input.jsonl 2>/dev/null || true
+		                awk 'match($0, /^[[:space:]]*\{/) {print}' .tmp/web_full_info_probe.txt >.tmp/web_full_info_merge_input.jsonl 2>/dev/null || true
 		                if [[ -s ".tmp/web_full_info_merge_input.jsonl" ]]; then
 		                    jq -cs 'unique_by(.input)[]' .tmp/web_full_info_merge_input.jsonl 2>>"$LOGFILE" >webs/web_full_info.txt || : >webs/web_full_info.txt
 		                else
@@ -150,20 +189,20 @@ function webprobe_simple() {
 		            fi
 		            # Keep cache as JSONL for later merges.
 		            cp webs/web_full_info.txt .tmp/web_full_info.txt 2>/dev/null || true
-
-		            # Extract URLs from JSONL
-		            if [[ -s "webs/web_full_info.txt" ]]; then
-		                jq -r 'try (.url // empty)' webs/web_full_info.txt 2>/dev/null \
-		                    | awk -v dom="$domain" 'index($0, dom) && $0 ~ /^https?:\\/\\// {print}' \
-		                    | sed 's/*.//' | anew_q_safe .tmp/probed_tmp.txt
-		            fi
 		        else
 		            log_note "webprobe_simple: probe output not JSON; treating as URL list" "${FUNCNAME[0]}" "${LINENO}"
-		            if [[ -s ".tmp/web_full_info_probe.txt" ]]; then
-		                awk -v dom="$domain" 'index($0, dom) && $0 ~ /^https?:\\/\\// {print}' .tmp/web_full_info_probe.txt 2>/dev/null \
-		                    | sed 's/*.//' | anew_q_safe .tmp/probed_tmp.txt
-		            fi
 		        fi
+		        _extract_probe_urls ".tmp/web_full_info_probe.txt" "$domain" ".tmp/probed_tmp.txt" || true
+		        urls_extracted=$(awk 'NF {c++} END {print c+0}' .tmp/probed_tmp.txt 2>/dev/null)
+
+		        # Fallback: if extraction from probe output produced nothing, try cached JSON.
+		        if [[ "${urls_extracted:-0}" -eq 0 ]] && [[ -s ".tmp/web_full_info.txt" ]] && _probe_output_is_json ".tmp/web_full_info.txt"; then
+		            _extract_probe_urls ".tmp/web_full_info.txt" "$domain" ".tmp/probed_tmp.txt" || true
+		            urls_extracted=$(awk 'NF {c++} END {print c+0}' .tmp/probed_tmp.txt 2>/dev/null)
+		            log_note "webprobe_simple: fallback to .tmp/web_full_info.txt urls_extracted=${urls_extracted}" "${FUNCNAME[0]}" "${LINENO}"
+		        fi
+
+		        log_note "webprobe_simple: probe_input_lines=${probe_input_lines} urls_extracted=${urls_extracted:-0} probe_is_json=${probe_is_json}" "${FUNCNAME[0]}" "${LINENO}"
 
 	        # Adaptive throttling heuristics: mark slow hosts (429/403) from httpx
 	        if [[ -s "webs/web_full_info.txt" ]]; then
@@ -253,26 +292,47 @@ function webprobe_full() {
     fi
         fi
 
-        # Process web_full_info_uncommon.txt
-        if [[ -s ".tmp/web_full_info_uncommon.txt" ]]; then
-            # Extract URLs
-            jq -r 'try .url' .tmp/web_full_info_uncommon.txt 2>/dev/null \
-                | grep "$domain" \
-                | grep -aEo 'https?://[^ ]+' \
-                | sed 's/*.//' \
-                | anew_q_safe .tmp/probed_uncommon_ports_tmp.txt
+	        # Process web_full_info_uncommon.txt
+	        if [[ -s ".tmp/web_full_info_uncommon.txt" ]]; then
+	            local uncommon_is_json uncommon_input_lines uncommon_urls_extracted
+	            uncommon_is_json=false
+	            if _probe_output_is_json ".tmp/web_full_info_uncommon.txt"; then
+	                uncommon_is_json=true
+	            fi
+	            uncommon_input_lines=$(awk 'NF {c++} END {print c+0}' .tmp/web_full_info_uncommon.txt 2>/dev/null)
 
-            # Extract plain web info
-            jq -r 'try . | "\(.url) [\(.status_code)] [\(.title)] [\(.webserver)] \(.tech)"' .tmp/web_full_info_uncommon.txt \
-                | grep "$domain" \
-                | anew_q_safe webs/web_full_info_uncommon_plain.txt
+	            : >.tmp/probed_uncommon_ports_tmp.txt 2>/dev/null || true
+	            _extract_probe_urls ".tmp/web_full_info_uncommon.txt" "$domain" ".tmp/probed_uncommon_ports_tmp.txt" || true
 
-            # Update webs_full_info_uncommon.txt based on whether domain is IP
-            if [[ $domain =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-                cat .tmp/web_full_info_uncommon.txt 2>>"$LOGFILE" | anew_q_safe webs/web_full_info_uncommon.txt
-            else
-                grep "$domain" .tmp/web_full_info_uncommon.txt | anew_q_safe webs/web_full_info_uncommon.txt
-            fi
+	            if [[ "$uncommon_is_json" != true ]]; then
+	                log_note "webprobe_full: probe output not JSON; treating as URL list" "${FUNCNAME[0]}" "${LINENO}"
+	                awk -v dom="$domain" 'index($0, dom) && $0 ~ /^https?:\/\// {print}' .tmp/web_full_info_uncommon.txt 2>/dev/null \
+	                    | _normalize_probe_urls \
+	                    | anew_q_safe webs/web_full_info_uncommon.txt
+	            fi
+
+	            if [[ "$uncommon_is_json" == true ]]; then
+	                # Extract plain web info
+	                jq -r 'try . | "\(.url) [\(.status_code)] [\(.title)] [\(.webserver)] \(.tech)"' .tmp/web_full_info_uncommon.txt \
+	                    | grep "$domain" \
+	                    | anew_q_safe webs/web_full_info_uncommon_plain.txt
+
+	                # Update webs_full_info_uncommon.txt based on whether domain is IP
+	                if [[ $domain =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
+	                    cat .tmp/web_full_info_uncommon.txt 2>>"$LOGFILE" | anew_q_safe webs/web_full_info_uncommon.txt
+	                else
+	                    grep "$domain" .tmp/web_full_info_uncommon.txt | anew_q_safe webs/web_full_info_uncommon.txt
+	                fi
+	            fi
+	            uncommon_urls_extracted=$(awk 'NF {c++} END {print c+0}' .tmp/probed_uncommon_ports_tmp.txt 2>/dev/null)
+
+	            # Fallback: try prior uncommon cache when current extraction yields nothing.
+	            if [[ "${uncommon_urls_extracted:-0}" -eq 0 ]] && [[ -s "webs/web_full_info_uncommon.txt" ]]; then
+	                _extract_probe_urls "webs/web_full_info_uncommon.txt" "$domain" ".tmp/probed_uncommon_ports_tmp.txt" || true
+	                uncommon_urls_extracted=$(awk 'NF {c++} END {print c+0}' .tmp/probed_uncommon_ports_tmp.txt 2>/dev/null)
+	                log_note "webprobe_full: fallback to webs/web_full_info_uncommon.txt urls_extracted=${uncommon_urls_extracted}" "${FUNCNAME[0]}" "${LINENO}"
+	            fi
+	            log_note "webprobe_full: probe_input_lines=${uncommon_input_lines} urls_extracted=${uncommon_urls_extracted:-0} probe_is_json=${uncommon_is_json}" "${FUNCNAME[0]}" "${LINENO}"
 
             # Count new websites
             if ! NUMOFLINES=$(anew_safe webs/webs_uncommon_ports.txt <.tmp/probed_uncommon_ports_tmp.txt | sed '/^$/d' | wc -l); then
@@ -2007,7 +2067,7 @@ function wordlist_gen() {
 
         start_func "${FUNCNAME[0]}" "Wordlist Generation"
 
-        [[ -s ".tmp/url_extract_tmp.txt" ]] && cat webs/url_extract.txt | anew -q .tmp/url_extract_tmp.txt || true
+        [[ -s ".tmp/url_extract_tmp.txt" ]] && [[ -s "webs/url_extract.txt" ]] && cat webs/url_extract.txt | anew -q .tmp/url_extract_tmp.txt || true
         # Ensure url_extract_tmp.txt exists and is not empty
         if [[ -s ".tmp/url_extract_tmp.txt" ]]; then
             # Define patterns for keys and values

@@ -162,6 +162,7 @@ while true; do
             fi
             while IFS= read -r t; do
                 [[ -z "$t" ]] && continue
+                t=$(_sanitize_list_entry "$t") || continue
                 ipcidr_target "$t" "$list"
             done <"$list"
             shift 2
@@ -621,6 +622,7 @@ case $opt_mode in
             sed_i 's/\r$//' "$flist"
             while IFS= read -r domain <&3; do
                 [[ -z "$domain" ]] && continue
+                domain=$(_sanitize_list_entry "$domain") || continue
                 start
                 recon
                 end
@@ -642,6 +644,7 @@ case $opt_mode in
             sed_i 's/\r$//' "$flist"
             while IFS= read -r domain <&3; do
                 [[ -z "$domain" ]] && continue
+                domain=$(_sanitize_list_entry "$domain") || continue
                 subs_menu
             done 3<"$flist"
         else
@@ -656,6 +659,7 @@ case $opt_mode in
             sed_i 's/\r$//' "$flist"
             while IFS= read -r domain <&3; do
                 [[ -z "$domain" ]] && continue
+                domain=$(_sanitize_list_entry "$domain") || continue
                 passive
             done 3<"$flist"
         else
@@ -671,6 +675,7 @@ case $opt_mode in
             sed_i 's/\r$//' "$flist"
             while IFS= read -r domain <&3; do
                 [[ -z "$domain" ]] && continue
+                domain=$(_sanitize_list_entry "$domain") || continue
                 all
             done 3<"$flist"
         else
@@ -702,6 +707,7 @@ case $opt_mode in
             sed_i 's/\r$//' "$flist"
             while IFS= read -r domain <&3; do
                 [[ -z "$domain" ]] && continue
+                domain=$(_sanitize_list_entry "$domain") || continue
                 start
                 osint
                 end
@@ -720,6 +726,7 @@ case $opt_mode in
             sed_i 's/\r$//' "$flist"
             while IFS= read -r domain <&3; do
                 [[ -z "$domain" ]] && continue
+                domain=$(_sanitize_list_entry "$domain") || continue
                 zen_menu
             done 3<"$flist"
         else

@@ -0,0 +1,94 @@
+#!/usr/bin/env bats
+
+setup() {
+  local project_root
+  project_root="$(cd "$(dirname "$BATS_TEST_FILENAME")/../.." && pwd)"
+  export SCRIPTPATH="$project_root"
+  export LOGFILE="/dev/null"
+  export bred='' bblue='' bgreen='' byellow='' yellow='' reset=''
+
+  export TEST_DIR="$BATS_TEST_TMPDIR/reconftw_webprobe_full"
+  mkdir -p "$TEST_DIR"
+  export dir="$TEST_DIR/example.com"
+  export called_fn_dir="$dir/.called_fn"
+  mkdir -p "$called_fn_dir" "$dir"
+  cd "$dir"
+
+  export MOCK_BIN="$TEST_DIR/mockbin"
+  mkdir -p "$MOCK_BIN"
+  export PATH="$MOCK_BIN:$PATH"
+
+  source "$project_root/reconftw.sh" --source-only
+  export domain="example.com"
+  export DIFF=false
+  export AXIOM=false
+  export WEBPROBEFULL=true
+  export PROXY=false
+  export UNCOMMON_PORTS_WEB="8080,8443"
+  export HTTPX_UNCOMMONPORTS_THREADS=10
+  export HTTPX_UNCOMMONPORTS_TIMEOUT=10
+}
+
+teardown() {
+  [[ -d "$TEST_DIR" ]] && rm -rf "$TEST_DIR"
+}
+
+@test "webprobe_full accepts URL-list output and updates uncommon/webs_all targets" {
+  mkdir -p .tmp webs subdomains
+  printf "a.example.com\n" > subdomains/subdomains.txt
+
+  cat > "$MOCK_BIN/httpx" <<'SH'
+#!/usr/bin/env bash
+out=""
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    -o)
+      out="$2"
+      shift 2
+      ;;
+    *)
+      shift
+      ;;
+  esac
+done
+printf '%s\n' "https://*.edge.example.com:8443" "https://api.example.com:8080" > "$out"
+SH
+  chmod +x "$MOCK_BIN/httpx"
+
+  run webprobe_full
+  [ "$status" -eq 0 ]
+  [ -s "webs/webs_uncommon_ports.txt" ]
+  grep -q "https://edge.example.com:8443" "webs/webs_uncommon_ports.txt"
+  grep -q "https://api.example.com:8080" "webs/webs_uncommon_ports.txt"
+  [ -s "webs/webs_all.txt" ]
+  grep -q "https://edge.example.com:8443" "webs/webs_all.txt"
+}
+
+@test "webprobe_full falls back to cached uncommon output when current extraction is empty" {
+  mkdir -p .tmp webs subdomains
+  printf "a.example.com\n" > subdomains/subdomains.txt
+  printf '%s\n' "https://cached.example.com:8443" > webs/web_full_info_uncommon.txt
+
+  cat > "$MOCK_BIN/httpx" <<'SH'
+#!/usr/bin/env bash
+out=""
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    -o)
+      out="$2"
+      shift 2
+      ;;
+    *)
+      shift
+      ;;
+  esac
+done
+printf '%s\n' "https://not-in-scope.invalid:8443" > "$out"
+SH
+  chmod +x "$MOCK_BIN/httpx"
+
+  run webprobe_full
+  [ "$status" -eq 0 ]
+  [ -s "webs/webs_uncommon_ports.txt" ]
+  grep -q "https://cached.example.com:8443" "webs/webs_uncommon_ports.txt"
+}