| 123456789101112131415161718192021222324252627282930313233343536 |
- #!/usr/bin/env python3
- """Check all curated TLDs in violator.conf against Lists.toml all section."""
- import re
- # Parse all TLDs from Lists.toml
- with open('scripts/violator-workdir/Lists.toml') as f:
- toml = f.read()
- m = re.search(r'^all = \[(.*?)\]', toml, re.DOTALL | re.MULTILINE)
- all_tlds = set(re.findall(r'"([a-z][a-z0-9]*)', m.group(1)))
- # Parse violator.conf
- with open('scripts/violator.conf') as f:
- conf = f.read()
- # Find all curated list sections and their tlds
- sections = re.findall(r'\[list\.(\w+)\].*?type\s*=\s*curated.*?tlds\s*=\s*(.*?)(?=\n\[|\Z)', conf, re.DOTALL)
- bogus = {}
- for name, tlds_block in sections:
- words = re.findall(r'[a-z][a-z0-9]*', tlds_block)
- # skip config keywords
- keywords = {'type', 'curated', 'description', 'tlds', 'min', 'max', 'filter', 'all'}
- for w in words:
- if w not in all_tlds and w not in keywords:
- bogus.setdefault(name, []).append(w)
- if not bogus:
- print("All curated TLDs exist in the all list!")
- else:
- total = 0
- for name, bads in bogus.items():
- print(f"\n[list.{name}] — {len(bads)} fake TLDs:")
- print(" " + " ".join(bads))
- total += len(bads)
- print(f"\nTotal: {total} TLDs to remove")
|