fetch-tlds.sh 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816
  1. #!/usr/bin/env bash
  2. # fetch-tlds.sh — Scrape purchasable TLD lists from registrar APIs
  3. # Outputs clean sorted lists for use in Lists.toml
  4. #
  5. # Usage:
  6. # ./scripts/fetch-tlds.sh # fetch all, show summary
  7. # ./scripts/fetch-tlds.sh porkbun # porkbun only
  8. # ./scripts/fetch-tlds.sh inwx # inwx only
  9. # ./scripts/fetch-tlds.sh --raw # output raw TLD lists (one per line)
  10. # ./scripts/fetch-tlds.sh --toml # output TOML-ready arrays
  11. # ./scripts/fetch-tlds.sh --diff # compare against current Lists.toml
  12. # ./scripts/fetch-tlds.sh --template # generate full Lists.toml with whois overrides if necessary
  13. #
  14. # Notes : yea this is ai slop, didnt make it myself oooo scary, but most of the rust i did myself just didnt feel like doing this at 4am and it somewhat works
  15. set -euo pipefail
  16. SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  17. PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
  18. LISTS_TOML="$PROJECT_DIR/Lists.toml"
  19. CACHE_DIR="$PROJECT_DIR/.tld-cache"
  20. mkdir -p "$CACHE_DIR"
  21. RED='\033[0;31m'
  22. GREEN='\033[0;32m'
  23. YELLOW='\033[1;33m'
  24. CYAN='\033[0;36m'
  25. BOLD='\033[1m'
  26. NC='\033[0m'
  27. # ─── Porkbun ────────────────────────────────────────────────────────────────
  28. fetch_porkbun() {
  29. local cache="$CACHE_DIR/porkbun.json"
  30. local max_age=86400 # 24h cache
  31. if [[ -f "$cache" ]]; then
  32. local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
  33. if (( age < max_age )); then
  34. echo "$cache"
  35. return 0
  36. fi
  37. fi
  38. echo -e "${CYAN}Fetching Porkbun pricing API...${NC}" >&2
  39. # abusing porkbun public no money needed ah endpoint is no API key is even needed
  40. if curl -sf -X POST "https://api.porkbun.com/api/json/v3/pricing/get" \
  41. -H "Content-Type: application/json" \
  42. -d '{}' \
  43. -o "$cache" 2>/dev/null; then
  44. echo "$cache"
  45. else
  46. echo -e "${RED}Failed to fetch Porkbun data${NC}" >&2
  47. return 1
  48. fi
  49. }
  50. parse_porkbun() {
  51. local json_file="$1"
  52. # Extract TLD keys from the pricing response
  53. # Response format: {"status":"SUCCESS","pricing":{"com":{...},"net":{...},...}}
  54. if command -v jq &>/dev/null; then
  55. jq -r '.pricing // {} | keys[]' "$json_file" 2>/dev/null | sort -u
  56. else
  57. # Fallback: grep for TLD keys (less reliable but works)
  58. grep -o '"[a-z][a-z0-9.-]*":{' "$json_file" | sed 's/"//g; s/:{//' | sort -u
  59. fi
  60. }
  61. # ─── INWX ───────────────────────────────────────────────────────────────────
  62. fetch_inwx() {
  63. local cache="$CACHE_DIR/inwx.html"
  64. local max_age=86400
  65. if [[ -f "$cache" ]]; then
  66. local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
  67. if (( age < max_age )); then
  68. echo "$cache"
  69. return 0
  70. fi
  71. fi
  72. echo -e "${CYAN}Fetching INWX domain list...${NC}" >&2
  73. # INWX domain check page has TLD list embedded as JSON
  74. if curl -sfL "https://www.inwx.de/en/domain/check" \
  75. -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \
  76. -o "$cache" 2>/dev/null; then
  77. echo "$cache"
  78. else
  79. echo -e "${YELLOW}Could not fetch INWX${NC}" >&2
  80. return 1
  81. fi
  82. }
  83. parse_inwx() {
  84. local html_file="$1"
  85. # TLDs are embedded as JSON objects with "tld":"xxx" in the page
  86. grep -oE '"tld":"[a-z]{2,20}"' "$html_file" | sed 's/"tld":"//;s/"//' | sort -u
  87. }
  88. # ─── OVH ────────────────────────────────────────────────────────────────────
  89. fetch_ovh() {
  90. local cache="$CACHE_DIR/ovh.json"
  91. local max_age=86400
  92. if [[ -f "$cache" ]]; then
  93. local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
  94. if (( age < max_age )); then
  95. echo "$cache"
  96. return 0
  97. fi
  98. fi
  99. echo -e "${CYAN}Fetching OVH domain extensions...${NC}" >&2
  100. if curl -sf "https://www.ovh.com/engine/apiv6/domain/extensions" \
  101. -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \
  102. -o "$cache" 2>/dev/null; then
  103. echo "$cache"
  104. else
  105. echo -e "${YELLOW}Could not fetch OVH extensions${NC}" >&2
  106. return 1
  107. fi
  108. }
  109. parse_ovh() {
  110. local json_file="$1"
  111. if command -v jq &>/dev/null; then
  112. # Only top-level TLDs (no dots = not sub-TLDs like com.au)
  113. jq -r '.[]' "$json_file" 2>/dev/null | grep -vE '\.' | sort -u
  114. else
  115. grep -oE '"[a-z]{2,20}"' "$json_file" | tr -d '"' | grep -vE '\.' | sort -u
  116. fi
  117. }
  118. # ─── tld-list.com (comprehensive registry, free basic list) ────────────────
  119. fetch_tldlist() {
  120. local cache="$CACHE_DIR/tldlist-basic.txt"
  121. local max_age=86400
  122. if [[ -f "$cache" ]]; then
  123. local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
  124. if (( age < max_age )); then
  125. echo "$cache"
  126. return 0
  127. fi
  128. fi
  129. echo -e "${CYAN}Fetching tld-list.com basic list...${NC}" >&2
  130. if curl -sf "https://tld-list.com/df/tld-list-basic.csv" \
  131. -H "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)" \
  132. -o "$cache" 2>/dev/null; then
  133. echo "$cache"
  134. else
  135. echo -e "${YELLOW}Could not fetch tld-list.com${NC}" >&2
  136. return 1
  137. fi
  138. }
  139. parse_tldlist() {
  140. local file="$1"
  141. # One TLD per line, CR/LF endings, includes IDN entries — filter to ASCII only
  142. tr -d '\r' < "$file" | grep -E '^[a-z][a-z0-9]*$' | sort -u
  143. }
  144. # ─── IANA root zone (fallback) ──────────────────────────────────────────────
  145. fetch_iana() {
  146. local cache="$CACHE_DIR/iana-tlds.txt"
  147. local max_age=604800 # 7 days
  148. if [[ -f "$cache" ]]; then
  149. local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
  150. if (( age < max_age )); then
  151. echo "$cache"
  152. return 0
  153. fi
  154. fi
  155. echo -e "${CYAN}Fetching IANA TLD list...${NC}" >&2
  156. if curl -sf "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" -o "$cache" 2>/dev/null; then
  157. echo "$cache"
  158. else
  159. echo -e "${RED}Failed to fetch IANA list${NC}" >&2
  160. return 1
  161. fi
  162. }
  163. parse_iana() {
  164. local file="$1"
  165. # Skip header line, lowercase everything, filter to 2-3 char ccTLDs
  166. tail -n +2 "$file" | tr '[:upper:]' '[:lower:]' | sort -u
  167. }
  168. parse_iana_cctlds() {
  169. local file="$1"
  170. tail -n +2 "$file" | tr '[:upper:]' '[:lower:]' | grep -E '^[a-z]{2}$' | sort -u
  171. }
  172. # ─── RDAP bootstrap (what actually has lookup servers) ──────────────────────
  173. fetch_rdap() {
  174. local cache="$CACHE_DIR/rdap-dns.json"
  175. local max_age=86400
  176. if [[ -f "$cache" ]]; then
  177. local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
  178. if (( age < max_age )); then
  179. echo "$cache"
  180. return 0
  181. fi
  182. fi
  183. echo -e "${CYAN}Fetching RDAP bootstrap...${NC}" >&2
  184. if curl -sf "https://data.iana.org/rdap/dns.json" -o "$cache" 2>/dev/null; then
  185. echo "$cache"
  186. else
  187. echo -e "${RED}Failed to fetch RDAP bootstrap${NC}" >&2
  188. return 1
  189. fi
  190. }
  191. parse_rdap_tlds() {
  192. local json_file="$1"
  193. if command -v jq &>/dev/null; then
  194. jq -r '.services[][] | .[]' "$json_file" 2>/dev/null | grep -v '^http' | tr '[:upper:]' '[:lower:]' | sort -u
  195. else
  196. grep -oE '"[a-z]{2,20}"' "$json_file" | tr -d '"' | sort -u
  197. fi
  198. }
  199. # ─── WHOIS server list (rfc1036/whois project) ─────────────────────────────
  200. fetch_whois_servers() {
  201. local cache="$CACHE_DIR/tld_serv_list.txt"
  202. local max_age=604800 # 7 days
  203. if [[ -f "$cache" ]]; then
  204. local age=$(( $(date +%s) - $(stat -f%m "$cache" 2>/dev/null || stat -c%Y "$cache" 2>/dev/null || echo 0) ))
  205. if (( age < max_age )); then
  206. echo "$cache"
  207. return 0
  208. fi
  209. fi
  210. echo -e "${CYAN}Fetching WHOIS server list...${NC}" >&2
  211. if curl -sf "https://raw.githubusercontent.com/rfc1036/whois/next/tld_serv_list" -o "$cache" 2>/dev/null; then
  212. echo "$cache"
  213. else
  214. echo -e "${YELLOW}Could not fetch WHOIS server list${NC}" >&2
  215. return 1
  216. fi
  217. }
  218. # Get the WHOIS server for a given TLD from the cached server list
  219. # Returns empty string if no server found or server is NONE/ARPA/etc
  220. get_whois_server() {
  221. local tld="$1"
  222. local serv_file="$2"
  223. # Format: .tld [optional-tag] server
  224. # Some entries have VERISIGN or similar tag before the server
  225. local line
  226. line=$(grep -E "^\\.${tld}[[:space:]]" "$serv_file" 2>/dev/null | head -1)
  227. if [[ -z "$line" ]]; then
  228. echo ""
  229. return
  230. fi
  231. # Extract server: last word on the line that looks like a hostname
  232. local server
  233. server=$(echo "$line" | awk '{
  234. for (i=NF; i>=2; i--) {
  235. if ($i ~ /^[a-z0-9].*\.[a-z]/) { print $i; exit }
  236. }
  237. }')
  238. # Filter out unusable entries
  239. if [[ "$server" == "NONE" || "$server" == "ARPA" || -z "$server" || "$server" == http* ]]; then
  240. echo ""
  241. else
  242. echo "$server"
  243. fi
  244. }
  245. # Get WHOIS server from IANA directly (slower, single TLD at a time)
  246. get_iana_whois_server() {
  247. local tld="$1"
  248. curl -s "https://www.iana.org/domains/root/db/${tld}.html" 2>/dev/null \
  249. | sed -n 's/.*WHOIS Server:<\/b> *\([^ <]*\).*/\1/p' \
  250. | head -1
  251. }
  252. # ─── Extract current Lists.toml entries ─────────────────────────────────────
  253. parse_current_lists() {
  254. local list_name="${1:-all}"
  255. if [[ ! -f "$LISTS_TOML" ]]; then
  256. echo -e "${RED}No Lists.toml found at $LISTS_TOML${NC}" >&2
  257. return 1
  258. fi
  259. # Extract TLDs from a named list, stripping quotes, colons (whois overrides), commas
  260. awk -v list="$list_name" '
  261. $0 ~ "^"list" *= *\\[" { found=1; next }
  262. found && /^\]/ { exit }
  263. found && /^[[:space:]]*\[/ { exit }
  264. found {
  265. gsub(/["\t,]/, " ")
  266. n = split($0, parts, " ")
  267. for (i=1; i<=n; i++) {
  268. if (parts[i] != "") {
  269. # Strip whois override suffix
  270. sub(/:.*/, "", parts[i])
  271. print parts[i]
  272. }
  273. }
  274. }
  275. ' "$LISTS_TOML" | sort -u
  276. }
  277. # ─── Helpers ────────────────────────────────────────────────────────────────
  278. to_toml_array() {
  279. # Reads TLDs from stdin, outputs TOML array format (wrapped at ~80 chars)
  280. local tlds=()
  281. while IFS= read -r tld; do
  282. [[ -z "$tld" ]] && continue
  283. tlds+=("$tld")
  284. done
  285. local line='\t'
  286. local first=true
  287. for tld in "${tlds[@]}"; do
  288. local entry="\"$tld\""
  289. if $first; then
  290. line+="$entry"
  291. first=false
  292. else
  293. local test_line="$line, $entry"
  294. if (( ${#test_line} > 78 )); then
  295. echo -e "$line,"
  296. line="\t$entry"
  297. else
  298. line+=", $entry"
  299. fi
  300. fi
  301. done
  302. [[ -n "$line" ]] && echo -e "$line,"
  303. }
  304. filter_cctlds() {
  305. grep -E '^[a-z]{2}$'
  306. }
  307. filter_short_tlds() {
  308. # 2-6 char TLDs that are useful for domain hacking
  309. grep -E '^[a-z]{2,6}$'
  310. }
  311. # ─── Known broken/unregistrable TLDs ────────────────────────────────────────
  312. SKIP_TLDS="bl bq eh mf gb bv sj kp hm"
  313. filter_skip() {
  314. local skip_pattern
  315. skip_pattern=$(echo "$SKIP_TLDS" | tr ' ' '|')
  316. grep -vE "^($skip_pattern)$"
  317. }
  318. # ─── Template generation ────────────────────────────────────────────────────
  319. # Generates a full Lists.toml with:
  320. # - "tld" for TLDs with RDAP support (direct lookup works)
  321. # - "tld:whois.server" for TLDs needing WHOIS fallback
  322. # - skip TLDs omitted entirely (no Patch.toml needed)
  323. #
  324. # Uses: Porkbun + OVH + INWX (purchasable), RDAP bootstrap (has server?), WHOIS server list
  325. # With --all-sources: also cross-references tld-list.com
  326. generate_template() {
  327. local all_registrar_tlds="$1"
  328. local rdap_tlds="$2"
  329. local source_summary="$3"
  330. # Fetch WHOIS server list for fallback
  331. local whois_serv_file=""
  332. if whois_serv_file=$(fetch_whois_servers 2>/dev/null); then
  333. true # got it
  334. fi
  335. # The input is already merged + filtered from all registrar sources
  336. local buyable_tlds
  337. buyable_tlds=$(echo "$all_registrar_tlds" | filter_skip | sort -u)
  338. local buyable_count
  339. buyable_count=$(echo "$buyable_tlds" | grep -c . || echo 0)
  340. # Build annotated TLD list: "tld" or "tld:whois_server"
  341. # A TLD needs a whois override if it's NOT in the RDAP bootstrap
  342. local annotated_all=()
  343. local annotated_cc=()
  344. local rdap_hit=0 whois_hit=0 bare_hit=0
  345. while IFS= read -r tld; do
  346. [[ -z "$tld" ]] && continue
  347. local entry=""
  348. if echo "$rdap_tlds" | grep -qx "$tld" 2>/dev/null; then
  349. # Has RDAP server — no override needed
  350. entry="$tld"
  351. ((rdap_hit++)) || true
  352. else
  353. # No RDAP — try to find WHOIS server
  354. local server=""
  355. if [[ -n "$whois_serv_file" ]]; then
  356. server=$(get_whois_server "$tld" "$whois_serv_file")
  357. fi
  358. if [[ -n "$server" ]]; then
  359. entry="${tld}:${server}"
  360. ((whois_hit++)) || true
  361. else
  362. # No known server — include bare, hoardom will try common patterns
  363. entry="$tld"
  364. ((bare_hit++)) || true
  365. fi
  366. fi
  367. annotated_all+=("$entry")
  368. # Also track ccTLDs (2-letter entries)
  369. local base_tld="${tld%%:*}"
  370. if [[ "$base_tld" =~ ^[a-z]{2}$ ]]; then
  371. annotated_cc+=("$entry")
  372. fi
  373. done <<< "$buyable_tlds"
  374. echo -e "${CYAN}Building template...${NC}" >&2
  375. echo -e " ${GREEN}${rdap_hit}${NC} TLDs with RDAP (direct lookup)" >&2
  376. echo -e " ${YELLOW}${whois_hit}${NC} TLDs with WHOIS override" >&2
  377. echo -e " ${RED}${bare_hit}${NC} TLDs with no known server (will probe)" >&2
  378. echo "" >&2
  379. # ── Curated lists (bare TLD names, annotated automatically) ─────────
  380. # Standard: com, net, org + generally desirable / well-known TLDs
  381. local standard_tlds=(
  382. "com" "net" "org" "io" "co" "dev" "app" "me" "info"
  383. "biz" "one" "xyz" "online" "site" "tech" "pro" "tv"
  384. "cc" "to" "sh" "li" "fm" "am" "gg" "ws" "la"
  385. "ms" "nu" "cx" "mn" "st" "tel" "ai" "id" "in"
  386. "it" "is" "at" "be" "de" "eu" "fr" "nl" "se"
  387. "uk" "us" "ca" "au" "nz" "club" "blog" "art" "fun"
  388. "lol" "wtf" "page" "link" "space" "store" "shop"
  389. )
  390. # Decent: the best of the best — com, net, org, io + short desirable ones
  391. # that work great for domain hacking and are punchy
  392. local decent_tlds=(
  393. "com" "net" "org" "io" "dev" "app" "co" "me"
  394. "ai" "sh" "to" "fm" "tv" "gg" "cc" "li" "am"
  395. "la" "nu" "id" "in" "it" "is" "at" "ws"
  396. "one" "pro" "bio" "art" "ink" "run" "win" "new"
  397. "lol" "pub" "fun" "vet" "fit" "rip" "wtf" "zip"
  398. )
  399. # Swiss: standard-like but with Swiss / Central European related TLDs up front
  400. local swiss_tlds=(
  401. "com" "net" "org" "ch" "li" "swiss" "zuerich"
  402. "io" "co" "dev" "app" "me" "info" "one" "pro"
  403. "de" "at" "fr" "it" "eu"
  404. "tech" "online" "site" "shop" "store"
  405. "biz" "xyz" "tv" "cc" "to" "sh" "fm" "am" "gg"
  406. )
  407. # Annotate curated lists with whois overrides where needed
  408. annotate_list() {
  409. local -n input_list=$1
  410. local result=()
  411. for bare_tld in "${input_list[@]}"; do
  412. local found=false
  413. for ann in "${annotated_all[@]}"; do
  414. local ann_base="${ann%%:*}"
  415. if [[ "$ann_base" == "$bare_tld" ]]; then
  416. result+=("$ann")
  417. found=true
  418. break
  419. fi
  420. done
  421. if ! $found; then
  422. result+=("$bare_tld")
  423. fi
  424. done
  425. printf '%s\n' "${result[@]}"
  426. }
  427. # Length-based filtered lists from annotated_all
  428. filter_annotated_by_length() {
  429. local min="$1"
  430. local max="$2"
  431. for ann in "${annotated_all[@]}"; do
  432. local base="${ann%%:*}"
  433. local len=${#base}
  434. if (( len >= min && len <= max )); then
  435. echo "$ann"
  436. fi
  437. done
  438. }
  439. # ─── Output ─────────────────────────────────────────────────────────
  440. local date_str
  441. date_str=$(date +%Y-%m-%d)
  442. cat <<HEADER
  443. # Lists.toml — Built-in TLD lists for hoardom
  444. # Auto-generated on ${date_str} from ${source_summary}
  445. #
  446. # Format:
  447. # "tld" — TLD has RDAP support, lookup works directly
  448. # "tld:whois.server" — No RDAP: use this WHOIS server for fallback
  449. #
  450. # ${buyable_count} purchasable TLDs (handshake/sub-TLDs excluded)
  451. # ${rdap_hit} have RDAP, ${whois_hit} need WHOIS override, ${bare_hit} will auto-probe
  452. #
  453. # Lists:
  454. # standard — common desirable TLDs (com, net, org, io, dev, ...)
  455. # decent — very best short punchy TLDs for domain hacking
  456. # swiss — standard-like but with Swiss/Central European TLDs prioritized
  457. # country — all 2-letter country-code TLDs
  458. # two — all 2-letter TLDs
  459. # three — all TLDs with 3 or fewer letters
  460. # four — all TLDs with exactly 4 letters
  461. # long — all TLDs with 5+ letters
  462. # all — everything
  463. HEADER
  464. echo "standard = ["
  465. annotate_list standard_tlds | to_toml_array
  466. echo "]"
  467. echo ""
  468. echo "decent = ["
  469. annotate_list decent_tlds | to_toml_array
  470. echo "]"
  471. echo ""
  472. echo "swiss = ["
  473. annotate_list swiss_tlds | to_toml_array
  474. echo "]"
  475. echo ""
  476. echo "country = ["
  477. printf '%s\n' "${annotated_cc[@]}" | to_toml_array
  478. echo "]"
  479. echo ""
  480. echo "two = ["
  481. filter_annotated_by_length 2 2 | to_toml_array
  482. echo "]"
  483. echo ""
  484. echo "three = ["
  485. filter_annotated_by_length 2 3 | to_toml_array
  486. echo "]"
  487. echo ""
  488. echo "four = ["
  489. filter_annotated_by_length 4 4 | to_toml_array
  490. echo "]"
  491. echo ""
  492. echo "long = ["
  493. filter_annotated_by_length 5 99 | to_toml_array
  494. echo "]"
  495. echo ""
  496. echo "all = ["
  497. printf '%s\n' "${annotated_all[@]}" | to_toml_array
  498. echo "]"
  499. }
  500. # ─── Main ───────────────────────────────────────────────────────────────────
  501. main() {
  502. local mode="summary"
  503. local source="all"
  504. local all_sources=false
  505. for arg in "$@"; do
  506. case "$arg" in
  507. --raw) mode="raw" ;;
  508. --toml) mode="toml" ;;
  509. --diff) mode="diff" ;;
  510. --template) mode="template" ;;
  511. --all-sources) all_sources=true ;;
  512. porkbun) source="porkbun" ;;
  513. inwx) source="inwx" ;;
  514. ovh) source="ovh" ;;
  515. iana) source="iana" ;;
  516. rdap) source="rdap" ;;
  517. tldlist) source="tldlist" ;;
  518. --help|-h)
  519. echo "Usage: $0 [source] [--raw|--toml|--diff|--template] [--all-sources]"
  520. echo ""
  521. echo "Sources: porkbun, ovh, inwx, iana, rdap, tldlist"
  522. echo ""
  523. echo "Flags:"
  524. echo " --raw Output raw TLD list (one per line)"
  525. echo " --toml Output TOML-ready arrays"
  526. echo " --diff Compare against current Lists.toml"
  527. echo " --template Generate full Lists.toml with whois overrides"
  528. echo " --all-sources Include tld-list.com for extra coverage (used as"
  529. echo " a filter: only TLDs also in a registrar are kept)"
  530. exit 0 ;;
  531. esac
  532. done
  533. local porkbun_tlds="" inwx_tlds="" ovh_tlds="" iana_tlds="" rdap_tlds="" tldlist_tlds=""
  534. local porkbun_count=0 inwx_count=0 ovh_count=0 iana_count=0 rdap_count=0 tldlist_count=0
  535. # Template mode needs all registrar sources + rdap regardless of source filter
  536. if [[ "$mode" == "template" ]]; then
  537. source="all"
  538. fi
  539. # ── Fetch from selected sources ──
  540. if [[ "$source" == "all" || "$source" == "porkbun" ]]; then
  541. if porkbun_file=$(fetch_porkbun); then
  542. porkbun_tlds=$(parse_porkbun "$porkbun_file")
  543. porkbun_count=$(echo "$porkbun_tlds" | grep -c . || true)
  544. fi
  545. fi
  546. if [[ "$source" == "all" || "$source" == "ovh" ]]; then
  547. if ovh_file=$(fetch_ovh); then
  548. ovh_tlds=$(parse_ovh "$ovh_file")
  549. ovh_count=$(echo "$ovh_tlds" | grep -c . || true)
  550. fi
  551. fi
  552. if [[ "$source" == "all" || "$source" == "inwx" ]]; then
  553. if inwx_file=$(fetch_inwx 2>/dev/null); then
  554. inwx_tlds=$(parse_inwx "$inwx_file")
  555. inwx_count=$(echo "$inwx_tlds" | grep -c . || true)
  556. fi
  557. fi
  558. if [[ "$source" == "all" || "$source" == "iana" ]]; then
  559. if iana_file=$(fetch_iana); then
  560. iana_tlds=$(parse_iana "$iana_file")
  561. iana_count=$(echo "$iana_tlds" | grep -c . || true)
  562. fi
  563. fi
  564. if [[ "$source" == "all" || "$source" == "rdap" ]]; then
  565. if rdap_file=$(fetch_rdap); then
  566. rdap_tlds=$(parse_rdap_tlds "$rdap_file")
  567. rdap_count=$(echo "$rdap_tlds" | grep -c . || true)
  568. fi
  569. fi
  570. if [[ "$all_sources" == true || "$source" == "tldlist" ]]; then
  571. if tldlist_file=$(fetch_tldlist); then
  572. tldlist_tlds=$(parse_tldlist "$tldlist_file")
  573. tldlist_count=$(echo "$tldlist_tlds" | grep -c . || true)
  574. fi
  575. fi
  576. # ── Filter porkbun: no handshake, no sub-TLDs ──
  577. local porkbun_filtered=""
  578. if [[ -n "$porkbun_tlds" ]]; then
  579. local porkbun_file="$CACHE_DIR/porkbun.json"
  580. if command -v jq &>/dev/null && [[ -f "$porkbun_file" ]]; then
  581. porkbun_filtered=$(jq -r '
  582. .pricing // {} | to_entries[] |
  583. select(.key | contains(".") | not) |
  584. select(.value.specialType // "" | test("handshake") | not) |
  585. .key
  586. ' "$porkbun_file" 2>/dev/null | sort -u)
  587. else
  588. porkbun_filtered=$(echo "$porkbun_tlds" | grep -v '\.' | sort -u)
  589. fi
  590. fi
  591. # ── Merge all registrar-confirmed purchasable TLDs ──
  592. # Only TLDs that have pricing at a real registrar are included
  593. local registrar_tlds
  594. registrar_tlds=$(echo -e "${porkbun_filtered}\n${ovh_tlds}\n${inwx_tlds}" | grep -E '^[a-z]' | sort -u | filter_skip)
  595. # If --all-sources, also include tld-list.com TLDs that appear in at least
  596. # one registrar (cross-reference = purchasable + known to community list)
  597. if [[ "$all_sources" == true && -n "$tldlist_tlds" ]]; then
  598. # tld-list.com entries that are ALSO in a registrar = confirmed purchasable
  599. local tldlist_confirmed
  600. tldlist_confirmed=$(comm -12 <(echo "$tldlist_tlds") <(echo "$registrar_tlds") 2>/dev/null || true)
  601. # They're already in registrar_tlds, so this just validates.
  602. # More useful: tld-list entries NOT in any registrar = brand/reserved (skip them)
  603. local tldlist_extra
  604. tldlist_extra=$(comm -23 <(echo "$tldlist_tlds") <(echo "$registrar_tlds") 2>/dev/null || true)
  605. local extra_count
  606. extra_count=$(echo "$tldlist_extra" | grep -c . || echo 0)
  607. echo -e " ${YELLOW}tld-list.com:${NC} $extra_count TLDs with no registrar pricing (brand/reserved, excluded)" >&2
  608. fi
  609. local all_tlds="$registrar_tlds"
  610. local all_cctlds
  611. all_cctlds=$(echo "$all_tlds" | filter_cctlds)
  612. # Build source summary string for template header
  613. local sources_used=()
  614. [[ $porkbun_count -gt 0 ]] && sources_used+=("Porkbun")
  615. [[ $ovh_count -gt 0 ]] && sources_used+=("OVH")
  616. [[ $inwx_count -gt 0 ]] && sources_used+=("INWX")
  617. local source_summary
  618. local joined
  619. joined=$(printf " + %s" "${sources_used[@]}")
  620. joined="${joined:3}" # strip leading " + "
  621. source_summary="${joined} + RDAP bootstrap + WHOIS server list"
  622. case "$mode" in
  623. raw)
  624. echo "$all_tlds"
  625. ;;
  626. toml)
  627. echo -e "${BOLD}# Purchasable TLDs from all registrars ($(echo "$all_tlds" | wc -l | tr -d ' ') total)${NC}"
  628. echo "all_registrars = ["
  629. echo "$all_tlds" | to_toml_array
  630. echo "]"
  631. echo ""
  632. echo "# Country-code TLDs (purchasable)"
  633. echo "cctlds = ["
  634. echo "$all_cctlds" | to_toml_array
  635. echo "]"
  636. ;;
  637. diff)
  638. echo -e "${BOLD}Comparing registrar data vs current Lists.toml${NC}"
  639. echo ""
  640. local current_all current_country
  641. current_all=$(parse_current_lists "all")
  642. current_country=$(parse_current_lists "country")
  643. # TLDs in registrars but NOT in our 'all' list
  644. if [[ -n "$all_tlds" ]]; then
  645. local missing_from_all
  646. missing_from_all=$(comm -23 <(echo "$all_tlds" | filter_short_tlds | sort) <(echo "$current_all" | sort) 2>/dev/null || true)
  647. if [[ -n "$missing_from_all" ]]; then
  648. local mc
  649. mc=$(echo "$missing_from_all" | wc -l | tr -d ' ')
  650. echo -e "${YELLOW}TLDs at registrars but NOT in our 'all' list ($mc):${NC}"
  651. echo "$missing_from_all" | tr '\n' ' '
  652. echo ""
  653. echo ""
  654. fi
  655. # ccTLDs at registrars but NOT in our 'country' list
  656. local missing_cc
  657. missing_cc=$(comm -23 <(echo "$all_cctlds" | sort) <(echo "$current_country" | sort) 2>/dev/null || true)
  658. if [[ -n "$missing_cc" ]]; then
  659. local mcc
  660. mcc=$(echo "$missing_cc" | wc -l | tr -d ' ')
  661. echo -e "${YELLOW}ccTLDs at registrars but NOT in 'country' list ($mcc):${NC}"
  662. echo "$missing_cc" | tr '\n' ' '
  663. echo ""
  664. echo ""
  665. fi
  666. # TLDs in our 'all' list but NOT at any registrar
  667. local extra
  668. extra=$(comm -13 <(echo "$all_tlds" | sort) <(echo "$current_all" | sort) 2>/dev/null || true)
  669. if [[ -n "$extra" ]]; then
  670. local ec
  671. ec=$(echo "$extra" | wc -l | tr -d ' ')
  672. echo -e "${CYAN}TLDs in our 'all' list but NOT at any registrar ($ec):${NC}"
  673. echo "$extra" | tr '\n' ' '
  674. echo ""
  675. echo ""
  676. fi
  677. fi
  678. # Check which of our TLDs have RDAP servers
  679. if [[ -n "$rdap_tlds" && -n "$current_all" ]]; then
  680. local no_rdap
  681. no_rdap=$(comm -23 <(echo "$current_all" | sort) <(echo "$rdap_tlds" | sort) 2>/dev/null || true)
  682. if [[ -n "$no_rdap" ]]; then
  683. local nrc
  684. nrc=$(echo "$no_rdap" | wc -l | tr -d ' ')
  685. echo -e "${RED}TLDs in our lists with NO RDAP server ($nrc) — need WHOIS fallback:${NC}"
  686. echo "$no_rdap" | tr '\n' ' '
  687. echo ""
  688. fi
  689. fi
  690. ;;
  691. template)
  692. generate_template "$registrar_tlds" "$rdap_tlds" "$source_summary"
  693. ;;
  694. summary)
  695. echo -e "${BOLD}═══ TLD Source Summary ═══${NC}"
  696. echo ""
  697. [[ $porkbun_count -gt 0 ]] && echo -e " ${GREEN}Porkbun${NC} $(echo "$porkbun_filtered" | grep -c . || echo 0) TLDs ($(echo "$porkbun_filtered" | filter_cctlds | wc -l | tr -d ' ') ccTLDs)"
  698. [[ $ovh_count -gt 0 ]] && echo -e " ${GREEN}OVH${NC} $ovh_count TLDs ($(echo "$ovh_tlds" | filter_cctlds | wc -l | tr -d ' ') ccTLDs)"
  699. [[ $inwx_count -gt 0 ]] && echo -e " ${GREEN}INWX${NC} $inwx_count TLDs ($(echo "$inwx_tlds" | filter_cctlds | wc -l | tr -d ' ') ccTLDs)"
  700. [[ $tldlist_count -gt 0 ]] && echo -e " ${GREEN}tld-list.com${NC} $tldlist_count TLDs (community registry, no pricing)"
  701. [[ $iana_count -gt 0 ]] && echo -e " ${GREEN}IANA${NC} $iana_count TLDs"
  702. [[ $rdap_count -gt 0 ]] && echo -e " ${GREEN}RDAP${NC} $rdap_count TLDs with lookup servers"
  703. echo ""
  704. # Show what each registrar uniquely contributes
  705. if [[ $porkbun_count -gt 0 && $ovh_count -gt 0 ]]; then
  706. local ovh_unique inwx_unique
  707. ovh_unique=$(comm -23 <(echo "$ovh_tlds" | sort) <(echo "$porkbun_filtered" | sort) | wc -l | tr -d ' ')
  708. echo -e " ${CYAN}OVH adds${NC} $ovh_unique TLDs not on Porkbun"
  709. if [[ $inwx_count -gt 0 ]]; then
  710. inwx_unique=$(comm -23 <(echo "$inwx_tlds" | sort) <(echo -e "${porkbun_filtered}\n${ovh_tlds}" | sort -u) | wc -l | tr -d ' ')
  711. echo -e " ${CYAN}INWX adds${NC} $inwx_unique TLDs not on Porkbun/OVH"
  712. fi
  713. echo ""
  714. fi
  715. echo -e " ${BOLD}Merged purchasable:${NC} $(echo "$all_tlds" | wc -l | tr -d ' ') TLDs"
  716. echo -e " ${BOLD}Merged ccTLDs:${NC} $(echo "$all_cctlds" | wc -l | tr -d ' ')"
  717. echo ""
  718. echo -e " Cached data in: ${CYAN}$CACHE_DIR${NC}"
  719. echo -e " Use ${BOLD}--diff${NC} to compare against Lists.toml"
  720. echo -e " Use ${BOLD}--toml${NC} to output TOML-ready arrays"
  721. echo -e " Use ${BOLD}--template${NC} to generate template Lists.toml"
  722. echo -e " Use ${BOLD}--all-sources${NC} to also fetch tld-list.com"
  723. echo -e " Use ${BOLD}--raw${NC} for raw TLD list (one per line)"
  724. ;;
  725. esac
  726. }
  727. main "$@"