#!/usr/bin/env python3 """Check all curated TLDs in violator.conf against Lists.toml all section.""" import re # Parse all TLDs from Lists.toml with open('scripts/violator-workdir/Lists.toml') as f: toml = f.read() m = re.search(r'^all = \[(.*?)\]', toml, re.DOTALL | re.MULTILINE) all_tlds = set(re.findall(r'"([a-z][a-z0-9]*)', m.group(1))) # Parse violator.conf with open('scripts/violator.conf') as f: conf = f.read() # Find all curated list sections and their tlds sections = re.findall(r'\[list\.(\w+)\].*?type\s*=\s*curated.*?tlds\s*=\s*(.*?)(?=\n\[|\Z)', conf, re.DOTALL) bogus = {} for name, tlds_block in sections: words = re.findall(r'[a-z][a-z0-9]*', tlds_block) # skip config keywords keywords = {'type', 'curated', 'description', 'tlds', 'min', 'max', 'filter', 'all'} for w in words: if w not in all_tlds and w not in keywords: bogus.setdefault(name, []).append(w) if not bogus: print("All curated TLDs exist in the all list!") else: total = 0 for name, bads in bogus.items(): print(f"\n[list.{name}] — {len(bads)} fake TLDs:") print(" " + " ".join(bads)) total += len(bads) print(f"\nTotal: {total} TLDs to remove")