import csv import xml.etree.ElementTree as ET from decimal import Decimal, getcontext, ROUND_HALF_UP # ========= Einstellungen ========= JXL_IN = r"C:\Users\fabia\Desktop\Masterprojekt_V3\Daten\campusnetz_bereinigt_plus_nachmessung.jxl" CSV_IN = r"C:\Users\fabia\Desktop\Masterprojekt_V3\Daten\campsnetz_beobachtungen_plus_nachmessungen.csv" CSV_OUT = r"C:\Users\fabia\Desktop\Masterprojekt_V3\Daten\campsnetz_beobachtungen_plus_nachmessungen_korrigiert.csv" getcontext().prec = 70 # ========= Hilfsfunktionen ========= def count_decimals(s: str, sep: str) -> int: if s is None: return 0 s = s.strip() if s == "": return 0 if ":ZH:" in s: s = s.split(":ZH:", 1)[0].strip() if sep in s: return len(s.split(sep, 1)[1]) return 0 def fmt_decimal_fixed(x: Decimal, decimals: int, sep: str) -> str: q = Decimal("1") if decimals == 0 else Decimal("1." + ("0" * decimals)) y = x.quantize(q, rounding=ROUND_HALF_UP) txt = format(y, "f") if sep != ".": txt = txt.replace(".", sep) if decimals == 0: txt = txt.split(sep)[0] return txt def parse_decimal_csv(s: str) -> Decimal: """ CSV-Zahlen mit Komma, evtl. mit ":ZH:..." im letzten Feld. """ s = (s or "").strip() if ":ZH:" in s: s = s.split(":ZH:", 1)[0].strip() s = s.replace(",", ".") return Decimal(s) def parse_decimal_comma(s: str) -> Decimal: """ Komma-String nach Decimal. """ return Decimal((s or "").strip().replace(",", ".")) def deg_to_gon_str(deg_str: str) -> str: """ JXL: Winkel in Grad (Dezimalpunkt). gon = deg * (10/9) Ausgabe mit exakt so vielen Nachkommastellen wie im JXL-Gradwert enthalten. Dezimaltrennzeichen: Komma. """ deg_str = (deg_str or "").strip() d = count_decimals(deg_str, ".") deg = Decimal(deg_str) gon = deg * (Decimal(10) / Decimal(9)) return fmt_decimal_fixed(gon, d, ",") def meter_str_from_jxl(m_str: str) -> str: """ JXL: Distanz in Meter (Dezimalpunkt). Ausgabe mit exakt so vielen Nachkommastellen wie in der JXL enthalten. Dezimaltrennzeichen: Komma. """ m_str = (m_str or "").strip() d = count_decimals(m_str, ".") return fmt_decimal_fixed(Decimal(m_str), d, ",") def is_obs_line(row: list[str]) -> bool: """ Beobachtungszeile: Zielpunkt nicht leer, Hz/Z/SD numerisch parsebar. Zielpunkt darf alphanumerisch sein (FH3 etc.). """ if len(row) < 4: return False if row[0].strip() == "" or row[1].strip() == "" or row[2].strip() == "" or row[3].strip() == "": return False try: _ = parse_decimal_csv(row[1]) _ = parse_decimal_csv(row[2]) _ = parse_decimal_csv(row[3]) return True except Exception: return False def is_station_candidate(row: list[str]) -> bool: """ Kandidat für Standpunkt: erste Spalte nicht leer, Messspalten leer. Ob es wirklich ein Standpunkt ist, entscheiden wir später über StationName-Menge. """ if len(row) < 4: return False return ( row[0].strip() != "" and row[1].strip() == "" and row[2].strip() == "" and row[3].strip() == "" ) def csv_is_rounding_of_jxl(csv_str: str, jxl_full_str: str) -> bool: """ Prüft: CSV ist gerundete Darstellung des JXL-Wertes. Kriterium: - CSV hat weniger Nachkommastellen als JXL - und: JXL auf CSV-Dezimalstellen gerundet == CSV-Wert (numerisch) """ dc = count_decimals(csv_str, ",") dj = count_decimals(jxl_full_str, ",") if dc >= dj: return False try: csv_val = parse_decimal_csv(csv_str) jxl_val = parse_decimal_comma(jxl_full_str) q = Decimal("1") if dc == 0 else Decimal("1." + ("0" * dc)) jxl_rounded = jxl_val.quantize(q, rounding=ROUND_HALF_UP) csv_q = csv_val.quantize(q, rounding=ROUND_HALF_UP) return jxl_rounded == csv_q except Exception: return False # ========= JXL einlesen ========= tree = ET.parse(JXL_IN) root = tree.getroot() # StationRecords: (StationName, StationID, IH) station_records = [] for sr in root.iter("StationRecord"): sname = (sr.findtext("StationName") or "").strip() sid = (sr.attrib.get("ID") or "").strip() ih = (sr.findtext("TheodoliteHeight") or "").strip() if sname != "" and sid != "": station_records.append((sname, sid, ih)) station_names_set = {sname for sname, _, _ in station_records} # pro StationName ggf. mehrere Aufbauten -> "nächsten unbenutzten" nehmen stationname_to_records = {} for sname, sid, ih in station_records: stationname_to_records.setdefault(sname, []).append((sid, ih)) stationname_usecount = {k: 0 for k in stationname_to_records.keys()} # TargetHeight je TargetRecord-ID target_height_by_id = {} for tr in root.iter("TargetRecord"): tid = (tr.attrib.get("ID") or "").strip() zh = (tr.findtext("TargetHeight") or "").strip() if tid != "": target_height_by_id[tid] = zh # Pro StationID: Sequenz der PointRecords station_seq = {sid: [] for _, sid, _ in station_records} for pr in root.iter("PointRecord"): stid = (pr.findtext("StationID") or "").strip() if stid == "" or stid not in station_seq: continue circle = pr.find("Circle") if circle is None: continue target_name = (pr.findtext("Name") or "").strip() target_id = (pr.findtext("TargetID") or "").strip() hz_deg = (circle.findtext("HorizontalCircle") or "").strip() z_deg = (circle.findtext("VerticalCircle") or "").strip() sd_m = (circle.findtext("EDMDistance") or "").strip() if target_name == "" or hz_deg == "" or z_deg == "" or sd_m == "": continue station_seq[stid].append({ "target": target_name, "hz_gon": deg_to_gon_str(hz_deg), "z_gon": deg_to_gon_str(z_deg), "sd_m": meter_str_from_jxl(sd_m), "zh": target_height_by_id.get(target_id, ""), }) # ========= Matching-Funktion ========= def pick_jxl_entry_for_obs(seq, start_ptr, zp, hz_csv, z_csv, sd_csv, search_window=200): """ Standard: nimmt seq[start_ptr] Wenn target nicht passt: sucht im Fenster nach passendem zp. Bei Mehrfachtreffern wird bevorzugt, wo gerundete Werte passen. """ if start_ptr >= len(seq): return None, start_ptr first = seq[start_ptr] if first["target"] == zp: return first, start_ptr + 1 end = min(len(seq), start_ptr + search_window) candidates = [] for i in range(start_ptr, end): if seq[i]["target"] == zp: candidates.append((i, seq[i])) if not candidates: return first, start_ptr + 1 if len(candidates) == 1: i, entry = candidates[0] return entry, i + 1 good = [] for i, entry in candidates: ok_hz = csv_is_rounding_of_jxl(hz_csv, entry["hz_gon"]) ok_z = csv_is_rounding_of_jxl(z_csv, entry["z_gon"]) ok_sd = csv_is_rounding_of_jxl(sd_csv, entry["sd_m"]) score = int(ok_hz) + int(ok_z) + int(ok_sd) good.append((score, i, entry)) good.sort(key=lambda t: (-t[0], t[1])) _, i_best, entry_best = good[0] return entry_best, i_best + 1 # ========= CSV verarbeiten ========= repl_counts = {"Hz": 0, "Z": 0, "SD": 0} current_station_id = None current_station_ptr = 0 line_no = 0 fehlende_IH = [] # (zeilennummer, standpunkt) fehlende_ZH = [] # (zeilennummer, standpunkt, zielpunkt) fehlender_StationRecord = [] # (zeilennummer, standpunkt_text) current_station_name = None with open(CSV_IN, newline="", encoding="utf-8") as fin, open(CSV_OUT, "w", newline="", encoding="utf-8") as fout: reader = csv.reader(fin, delimiter=";") writer = csv.writer(fout, delimiter=";", lineterminator="\n") for row in reader: line_no += 1 if len(row) < 4: row = row + [""] * (4 - len(row)) # ---- Standpunkt-Kandidat? ---- if is_station_candidate(row): sp = row[0].strip() # Nur als Standpunkt behandeln, wenn er wirklich in der JXL als StationName existiert: if sp in station_names_set: use = stationname_usecount.get(sp, 0) recs = stationname_to_records[sp] if use >= len(recs): raise RuntimeError(f"Standpunkt {sp} kommt in CSV öfter vor als in der JXL (StationRecords).") sid, ih = recs[use] stationname_usecount[sp] = use + 1 current_station_name = sp current_station_id = sid current_station_ptr = 0 # fehlende IH loggen if ih is None or str(ih).strip() == "": fehlende_IH.append((line_no, sp)) writer.writerow([sp, f"IH:{ih}", "", "", ""]) continue # NICHT in JXL: wenn es wie ein Standpunkt aussieht -> loggen if sp.isdigit(): fehlender_StationRecord.append((line_no, sp)) writer.writerow(row) continue # ---- Beobachtung? ---- if is_obs_line(row) and current_station_id is not None: zp = row[0].strip() hz_csv = row[1].strip() z_csv = row[2].strip() sd_csv = row[3].strip() seq = station_seq.get(current_station_id, []) jxl_entry, new_ptr = pick_jxl_entry_for_obs(seq, current_station_ptr, zp, hz_csv, z_csv, sd_csv) if jxl_entry is None: writer.writerow(row) continue current_station_ptr = new_ptr hz_out = hz_csv z_out = z_csv sd_out = sd_csv if csv_is_rounding_of_jxl(hz_csv, jxl_entry["hz_gon"]): hz_out = jxl_entry["hz_gon"] repl_counts["Hz"] += 1 if csv_is_rounding_of_jxl(z_csv, jxl_entry["z_gon"]): z_out = jxl_entry["z_gon"] repl_counts["Z"] += 1 if csv_is_rounding_of_jxl(sd_csv, jxl_entry["sd_m"]): sd_out = jxl_entry["sd_m"] repl_counts["SD"] += 1 # fehlende ZH loggen zh_val = jxl_entry.get("zh", "") if zh_val is None or str(zh_val).strip() == "": fehlende_ZH.append((line_no, current_station_name, zp)) last_col = f"{sd_out}:ZH:{zh_val}" if str(zh_val).strip() != "" else sd_out writer.writerow([zp, hz_out, z_out, last_col]) continue # ---- alles andere unverändert ---- writer.writerow(row) print("Fertig.") print("Ausgabe:", CSV_OUT) print("Ersetzungen (Rundung -> JXL volle Nachkommastellen):", repl_counts) print("\n--- Fehlende IH ---") print("Anzahl:", len(fehlende_IH)) for z, sp in fehlende_IH[:50]: print(f"Zeile {z}: Standpunkt {sp} (IH leer in JXL)") if len(fehlende_IH) > 50: print("... (weitere gekürzt)") print("\n--- Fehlende ZH ---") print("Anzahl:", len(fehlende_ZH)) for z, sp, zp in fehlende_ZH[:50]: print(f"Zeile {z}: Standpunkt {sp}, Ziel {zp} (ZH nicht ermittelt)") if len(fehlende_ZH) > 50: print("... (weitere gekürzt)") print("\n--- Standpunkt in CSV, aber kein StationRecord in JXL ---") print("Anzahl:", len(fehlender_StationRecord)) for z, sp in fehlender_StationRecord[:50]: print(f"Zeile {z}: Standpunkt {sp} (nicht in JXL als StationName gefunden)") if len(fehlender_StationRecord) > 50: print("... (weitere gekürzt)")