rdb-to-sheets.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. import sys
  2. import pandas as pd
  3. import re
  4. # Ensure its easier to use for normies
  5. if len(sys.argv) < 2:
  6. print("Usage: python rdb-to-sheets.py <input_file> [output_file]")
  7. print("Example: python rdb-to-sheets.py RDBv2.txt RDB.xlsx")
  8. print("If no output file is defined RDB.xlsx will be used per default make sure such doesn't exist already")
  9. sys.exit(1) # Committ da sus side for script!
  10. # Set input and output file
  11. input_file = sys.argv[1]
  12. output_path = sys.argv[2] if len(sys.argv) > 2 else "RDB.xlsx" # Default to RDB.xlsx if user of script doesnt define it
  13. with open(input_file, "r") as file:
  14. lines = file.readlines()
  15. # Look at the data
  16. def parse_rdb_data(lines):
  17. entries = []
  18. current_entry = {}
  19. current_tns = []
  20. for line in lines:
  21. line = line.strip()
  22. if not line:
  23. continue
  24. # New DN entry starts
  25. if line.startswith("DN"):
  26. if current_entry: # How about we save the previous entry before we look at a new one omg
  27. if current_tns:
  28. current_entry["TNs"] = current_tns
  29. entries.append(current_entry)
  30. current_tns = []
  31. current_entry = {"DN": line.split()[1]}
  32. elif "CPND" in line:
  33. current_entry["NAME"] = None
  34. elif "NAME" in line and current_entry.get("NAME") is None:
  35. current_entry["NAME"] = line.replace("NAME", "").strip()
  36. elif line.startswith("TYPE"):
  37. current_entry["TYPE"] = line.split()[1]
  38. elif line.startswith("ROUT"):
  39. current_entry["ROUT"] = line.split()[1]
  40. elif line.startswith("STCD"):
  41. current_entry["STCD"] = line.split()[1]
  42. elif line.startswith("FEAT"):
  43. current_entry["FEAT"] = " ".join(line.split()[1:])
  44. elif line.startswith("TN"):
  45. tn_match = re.search(r"(\d{3} \d \d{2} \d{2})", line)
  46. if tn_match:
  47. tn_value = tn_match.group(1)
  48. tn_entry = {"TN_1_TN": tn_value}
  49. key_match = re.search(r"KEY (\d{2})", line)
  50. if key_match:
  51. tn_entry["KEY"] = key_match.group(1)
  52. des_match = re.search(r"DES\s+(\S+)", line)
  53. if des_match:
  54. tn_entry["DES"] = des_match.group(1)
  55. date_match = re.search(r"(\d{1,2} \w{3} \d{4})", line)
  56. if date_match:
  57. tn_entry["DATE"] = date_match.group(0)
  58. tn_entry["MARP"] = "YES" if "MARP" in line else "NO"
  59. current_tns.append(tn_entry)
  60. if current_entry:
  61. if current_tns:
  62. current_entry["TNs"] = current_tns
  63. entries.append(current_entry)
  64. return entries
  65. parsed_entries = parse_rdb_data(lines)
  66. # Categorize them and stuff
  67. def categorize_entries(entries):
  68. main_entries = []
  69. multiple_tns_entries = []
  70. route_entries = []
  71. cdp_entries = []
  72. feature_code_entries = []
  73. att_ldn_entries = []
  74. other_entries = []
  75. unexpected_data = []
  76. for entry in entries:
  77. entry_type = entry.get("TYPE")
  78. if entry_type in ["SL1", "500"]:
  79. if "TNs" in entry and len(entry["TNs"]) > 1:
  80. for tn in entry["TNs"]:
  81. multi_entry = entry.copy()
  82. multi_entry.update(tn)
  83. multiple_tns_entries.append(multi_entry)
  84. elif "TNs" in entry:
  85. for tn in entry["TNs"]:
  86. main_entry = entry.copy()
  87. main_entry.update(tn)
  88. main_entries.append(main_entry)
  89. else:
  90. main_entries.append(entry)
  91. elif entry_type in ["ATT", "LDN"]:
  92. if "TNs" in entry:
  93. for tn in entry["TNs"]:
  94. att_ldn_entry = entry.copy()
  95. att_ldn_entry.update(tn)
  96. att_ldn_entries.append(att_ldn_entry)
  97. else:
  98. att_ldn_entries.append(entry)
  99. elif entry_type == "RDB":
  100. route_entries.append(entry)
  101. elif entry_type == "CDP":
  102. cdp_entries.append({"DN": entry.get("DN", ""), "TYPE": entry.get("TYPE", ""), "STCD": entry.get("STCD", "DSC")})
  103. elif entry_type == "FFC":
  104. feature_code_entries.append(entry)
  105. elif entry_type is None:
  106. unexpected_data.append(entry)
  107. else:
  108. other_entries.append(entry)
  109. unexpected_sheet_name = "No Unexpected Data" if not unexpected_data else "Unexpected Data"
  110. return main_entries, multiple_tns_entries, route_entries, cdp_entries, feature_code_entries, att_ldn_entries, other_entries, unexpected_data, unexpected_sheet_name
  111. # Categorize entries further
  112. main_entries, multiple_tns_entries, route_entries, cdp_entries, feature_code_entries, att_ldn_entries, other_entries, unexpected_data, unexpected_sheet_name = categorize_entries(parsed_entries)
  113. # Save all the data to excel but dropping the raw unprocessed TNs tab that gets made for some reason I dont know why I dont like coding and python
  114. with pd.ExcelWriter(output_path) as writer:
  115. for sheet_name, data in {
  116. "Main": main_entries,
  117. "Multiple TNs": multiple_tns_entries,
  118. "Routes": route_entries,
  119. "CDP": cdp_entries,
  120. "Feature Codes": feature_code_entries,
  121. "ATT_LDN": att_ldn_entries,
  122. "Other": other_entries,
  123. unexpected_sheet_name: unexpected_data
  124. }.items():
  125. df = pd.DataFrame(data)
  126. if "TNs" in df.columns:
  127. df = df.drop(columns=["TNs"]) # Remove TNs column if present for some reason
  128. df.to_excel(writer, sheet_name=sheet_name, index=False)
  129. print("Processing complete. Output saved to", output_path)