#!/usr/bin/env bash

# Usage:
#   ./domains_to_asn.sh domains output
#
# Output format:
#   AS15169;google.com
#   AS8075;microsoft.com

INPUT="$1"
OUTPUT="$2"

if [[ -z "$INPUT" || -z "$OUTPUT" ]]; then
    echo "Usage: $0 domains output"
    exit 1
fi

> "$OUTPUT"

while IFS= read -r domain; do
    [[ -z "$domain" ]] && continue

    # Collect all IPv4 and IPv6 addresses
    mapfile -t ips < <(
        dig +short A "$domain"
        dig +short AAAA "$domain"
    )

    # Skip domains that don't resolve
    [[ ${#ips[@]} -eq 0 ]] && continue

    declare -A asn_count=()

    for ip in "${ips[@]}"; do
        asn=$(whois -h whois.cymru.com " -v $ip" 2>/dev/null \
              | awk 'NR==2 {print $1}')

        [[ -z "$asn" || "$asn" == "NA" ]] && continue
        ((asn_count["$asn"]++))
    done

    # Skip domains with no ASN results
    [[ ${#asn_count[@]} -eq 0 ]] && continue

    # Pick the most frequent ASN
    best_asn=$(for a in "${!asn_count[@]}"; do
                   echo "${asn_count[$a]} $a"
               done | sort -rn | head -n 1 | awk '{print $2}')

    echo "AS$best_asn;$domain" >> "$OUTPUT"

    unset asn_count
done < "$INPUT"
