Push

2026-01-22 08:51:18 +01:00
parent 7a170f5ead
commit 7c1b335b1c
5 changed files with 49478 additions and 49398 deletions
--- a/Vorbereitungen_Fabian/Test.py
+++ b/Vorbereitungen_Fabian/Test.py
@@ -1,6 +1,361 @@
-i = 5
+import csv
+import xml.etree.ElementTree as ET
+from decimal import Decimal, getcontext, ROUND_HALF_UP

-if i % 6 != 0:
-        print("nein")
-else:
-    print("ja")
+# ========= Einstellungen =========
+JXL_IN = r"C:\Users\fabia\Desktop\Masterprojekt_V3\Daten\campusnetz_bereinigt_plus_nachmessung.jxl"
+CSV_IN = r"C:\Users\fabia\Desktop\Masterprojekt_V3\Daten\campsnetz_beobachtungen_plus_nachmessungen.csv"
+CSV_OUT = r"C:\Users\fabia\Desktop\Masterprojekt_V3\Daten\campsnetz_beobachtungen_plus_nachmessungen_korrigiert.csv"
+
+getcontext().prec = 70
+
+
+# ========= Hilfsfunktionen =========
+def count_decimals(s: str, sep: str) -> int:
+    if s is None:
+        return 0
+    s = s.strip()
+    if s == "":
+        return 0
+    if ":ZH:" in s:
+        s = s.split(":ZH:", 1)[0].strip()
+    if sep in s:
+        return len(s.split(sep, 1)[1])
+    return 0
+
+
+def fmt_decimal_fixed(x: Decimal, decimals: int, sep: str) -> str:
+    q = Decimal("1") if decimals == 0 else Decimal("1." + ("0" * decimals))
+    y = x.quantize(q, rounding=ROUND_HALF_UP)
+    txt = format(y, "f")
+    if sep != ".":
+        txt = txt.replace(".", sep)
+    if decimals == 0:
+        txt = txt.split(sep)[0]
+    return txt
+
+
+def parse_decimal_csv(s: str) -> Decimal:
+    """
+    CSV-Zahlen mit Komma, evtl. mit ":ZH:..." im letzten Feld.
+    """
+    s = (s or "").strip()
+    if ":ZH:" in s:
+        s = s.split(":ZH:", 1)[0].strip()
+    s = s.replace(",", ".")
+    return Decimal(s)
+
+
+def parse_decimal_comma(s: str) -> Decimal:
+    """
+    Komma-String nach Decimal.
+    """
+    return Decimal((s or "").strip().replace(",", "."))
+
+
+def deg_to_gon_str(deg_str: str) -> str:
+    """
+    JXL: Winkel in Grad (Dezimalpunkt).
+    gon = deg * (10/9)
+    Ausgabe mit exakt so vielen Nachkommastellen wie im JXL-Gradwert enthalten.
+    Dezimaltrennzeichen: Komma.
+    """
+    deg_str = (deg_str or "").strip()
+    d = count_decimals(deg_str, ".")
+    deg = Decimal(deg_str)
+    gon = deg * (Decimal(10) / Decimal(9))
+    return fmt_decimal_fixed(gon, d, ",")
+
+
+def meter_str_from_jxl(m_str: str) -> str:
+    """
+    JXL: Distanz in Meter (Dezimalpunkt).
+    Ausgabe mit exakt so vielen Nachkommastellen wie in der JXL enthalten.
+    Dezimaltrennzeichen: Komma.
+    """
+    m_str = (m_str or "").strip()
+    d = count_decimals(m_str, ".")
+    return fmt_decimal_fixed(Decimal(m_str), d, ",")
+
+
+def is_obs_line(row: list[str]) -> bool:
+    """
+    Beobachtungszeile: Zielpunkt nicht leer, Hz/Z/SD numerisch parsebar.
+    Zielpunkt darf alphanumerisch sein (FH3 etc.).
+    """
+    if len(row) < 4:
+        return False
+    if row[0].strip() == "" or row[1].strip() == "" or row[2].strip() == "" or row[3].strip() == "":
+        return False
+    try:
+        _ = parse_decimal_csv(row[1])
+        _ = parse_decimal_csv(row[2])
+        _ = parse_decimal_csv(row[3])
+        return True
+    except Exception:
+        return False
+
+
+def is_station_candidate(row: list[str]) -> bool:
+    """
+    Kandidat für Standpunkt: erste Spalte nicht leer, Messspalten leer.
+    Ob es wirklich ein Standpunkt ist, entscheiden wir später über StationName-Menge.
+    """
+    if len(row) < 4:
+        return False
+    return (
+        row[0].strip() != ""
+        and row[1].strip() == ""
+        and row[2].strip() == ""
+        and row[3].strip() == ""
+    )
+
+
+def csv_is_rounding_of_jxl(csv_str: str, jxl_full_str: str) -> bool:
+    """
+    Prüft: CSV ist gerundete Darstellung des JXL-Wertes.
+    Kriterium:
+      - CSV hat weniger Nachkommastellen als JXL
+      - und: JXL auf CSV-Dezimalstellen gerundet == CSV-Wert (numerisch)
+    """
+    dc = count_decimals(csv_str, ",")
+    dj = count_decimals(jxl_full_str, ",")
+    if dc >= dj:
+        return False
+
+    try:
+        csv_val = parse_decimal_csv(csv_str)
+        jxl_val = parse_decimal_comma(jxl_full_str)
+
+        q = Decimal("1") if dc == 0 else Decimal("1." + ("0" * dc))
+        jxl_rounded = jxl_val.quantize(q, rounding=ROUND_HALF_UP)
+        csv_q = csv_val.quantize(q, rounding=ROUND_HALF_UP)
+
+        return jxl_rounded == csv_q
+    except Exception:
+        return False
+
+
+# ========= JXL einlesen =========
+tree = ET.parse(JXL_IN)
+root = tree.getroot()
+
+# StationRecords: (StationName, StationID, IH)
+station_records = []
+for sr in root.iter("StationRecord"):
+    sname = (sr.findtext("StationName") or "").strip()
+    sid = (sr.attrib.get("ID") or "").strip()
+    ih = (sr.findtext("TheodoliteHeight") or "").strip()
+    if sname != "" and sid != "":
+        station_records.append((sname, sid, ih))
+
+station_names_set = {sname for sname, _, _ in station_records}
+
+# pro StationName ggf. mehrere Aufbauten -> "nächsten unbenutzten" nehmen
+stationname_to_records = {}
+for sname, sid, ih in station_records:
+    stationname_to_records.setdefault(sname, []).append((sid, ih))
+stationname_usecount = {k: 0 for k in stationname_to_records.keys()}
+
+# TargetHeight je TargetRecord-ID
+target_height_by_id = {}
+for tr in root.iter("TargetRecord"):
+    tid = (tr.attrib.get("ID") or "").strip()
+    zh = (tr.findtext("TargetHeight") or "").strip()
+    if tid != "":
+        target_height_by_id[tid] = zh
+
+# Pro StationID: Sequenz der PointRecords
+station_seq = {sid: [] for _, sid, _ in station_records}
+
+for pr in root.iter("PointRecord"):
+    stid = (pr.findtext("StationID") or "").strip()
+    if stid == "" or stid not in station_seq:
+        continue
+
+    circle = pr.find("Circle")
+    if circle is None:
+        continue
+
+    target_name = (pr.findtext("Name") or "").strip()
+    target_id = (pr.findtext("TargetID") or "").strip()
+
+    hz_deg = (circle.findtext("HorizontalCircle") or "").strip()
+    z_deg = (circle.findtext("VerticalCircle") or "").strip()
+    sd_m = (circle.findtext("EDMDistance") or "").strip()
+
+    if target_name == "" or hz_deg == "" or z_deg == "" or sd_m == "":
+        continue
+
+    station_seq[stid].append({
+        "target": target_name,
+        "hz_gon": deg_to_gon_str(hz_deg),
+        "z_gon": deg_to_gon_str(z_deg),
+        "sd_m": meter_str_from_jxl(sd_m),
+        "zh": target_height_by_id.get(target_id, ""),
+    })
+
+
+# ========= Matching-Funktion =========
+def pick_jxl_entry_for_obs(seq, start_ptr, zp, hz_csv, z_csv, sd_csv, search_window=200):
+    """
+    Standard: nimmt seq[start_ptr]
+    Wenn target nicht passt: sucht im Fenster nach passendem zp.
+    Bei Mehrfachtreffern wird bevorzugt, wo gerundete Werte passen.
+    """
+    if start_ptr >= len(seq):
+        return None, start_ptr
+
+    first = seq[start_ptr]
+    if first["target"] == zp:
+        return first, start_ptr + 1
+
+    end = min(len(seq), start_ptr + search_window)
+    candidates = []
+    for i in range(start_ptr, end):
+        if seq[i]["target"] == zp:
+            candidates.append((i, seq[i]))
+
+    if not candidates:
+        return first, start_ptr + 1
+
+    if len(candidates) == 1:
+        i, entry = candidates[0]
+        return entry, i + 1
+
+    good = []
+    for i, entry in candidates:
+        ok_hz = csv_is_rounding_of_jxl(hz_csv, entry["hz_gon"])
+        ok_z = csv_is_rounding_of_jxl(z_csv, entry["z_gon"])
+        ok_sd = csv_is_rounding_of_jxl(sd_csv, entry["sd_m"])
+        score = int(ok_hz) + int(ok_z) + int(ok_sd)
+        good.append((score, i, entry))
+
+    good.sort(key=lambda t: (-t[0], t[1]))
+    _, i_best, entry_best = good[0]
+    return entry_best, i_best + 1
+
+
+# ========= CSV verarbeiten =========
+repl_counts = {"Hz": 0, "Z": 0, "SD": 0}
+current_station_id = None
+current_station_ptr = 0
+
+line_no = 0
+
+fehlende_IH = []                 # (zeilennummer, standpunkt)
+fehlende_ZH = []                 # (zeilennummer, standpunkt, zielpunkt)
+fehlender_StationRecord = []     # (zeilennummer, standpunkt_text)
+
+current_station_name = None
+
+with open(CSV_IN, newline="", encoding="utf-8") as fin, open(CSV_OUT, "w", newline="", encoding="utf-8") as fout:
+    reader = csv.reader(fin, delimiter=";")
+    writer = csv.writer(fout, delimiter=";", lineterminator="\n")
+
+    for row in reader:
+        line_no += 1
+
+        if len(row) < 4:
+            row = row + [""] * (4 - len(row))
+
+        # ---- Standpunkt-Kandidat? ----
+        if is_station_candidate(row):
+            sp = row[0].strip()
+
+            # Nur als Standpunkt behandeln, wenn er wirklich in der JXL als StationName existiert:
+            if sp in station_names_set:
+                use = stationname_usecount.get(sp, 0)
+                recs = stationname_to_records[sp]
+                if use >= len(recs):
+                    raise RuntimeError(f"Standpunkt {sp} kommt in CSV öfter vor als in der JXL (StationRecords).")
+
+                sid, ih = recs[use]
+                stationname_usecount[sp] = use + 1
+
+                current_station_name = sp
+                current_station_id = sid
+                current_station_ptr = 0
+
+                # fehlende IH loggen
+                if ih is None or str(ih).strip() == "":
+                    fehlende_IH.append((line_no, sp))
+
+                writer.writerow([sp, f"IH:{ih}", "", "", ""])
+                continue
+
+            # NICHT in JXL: wenn es wie ein Standpunkt aussieht -> loggen
+            if sp.isdigit():
+                fehlender_StationRecord.append((line_no, sp))
+
+            writer.writerow(row)
+            continue
+
+        # ---- Beobachtung? ----
+        if is_obs_line(row) and current_station_id is not None:
+            zp = row[0].strip()
+            hz_csv = row[1].strip()
+            z_csv = row[2].strip()
+            sd_csv = row[3].strip()
+
+            seq = station_seq.get(current_station_id, [])
+            jxl_entry, new_ptr = pick_jxl_entry_for_obs(seq, current_station_ptr, zp, hz_csv, z_csv, sd_csv)
+
+            if jxl_entry is None:
+                writer.writerow(row)
+                continue
+
+            current_station_ptr = new_ptr
+
+            hz_out = hz_csv
+            z_out = z_csv
+            sd_out = sd_csv
+
+            if csv_is_rounding_of_jxl(hz_csv, jxl_entry["hz_gon"]):
+                hz_out = jxl_entry["hz_gon"]
+                repl_counts["Hz"] += 1
+
+            if csv_is_rounding_of_jxl(z_csv, jxl_entry["z_gon"]):
+                z_out = jxl_entry["z_gon"]
+                repl_counts["Z"] += 1
+
+            if csv_is_rounding_of_jxl(sd_csv, jxl_entry["sd_m"]):
+                sd_out = jxl_entry["sd_m"]
+                repl_counts["SD"] += 1
+
+            # fehlende ZH loggen
+            zh_val = jxl_entry.get("zh", "")
+            if zh_val is None or str(zh_val).strip() == "":
+                fehlende_ZH.append((line_no, current_station_name, zp))
+
+            last_col = f"{sd_out}:ZH:{zh_val}" if str(zh_val).strip() != "" else sd_out
+            writer.writerow([zp, hz_out, z_out, last_col])
+            continue
+
+        # ---- alles andere unverändert ----
+        writer.writerow(row)
+
+print("Fertig.")
+print("Ausgabe:", CSV_OUT)
+print("Ersetzungen (Rundung -> JXL volle Nachkommastellen):", repl_counts)
+
+print("\n--- Fehlende IH ---")
+print("Anzahl:", len(fehlende_IH))
+for z, sp in fehlende_IH[:50]:
+    print(f"Zeile {z}: Standpunkt {sp} (IH leer in JXL)")
+if len(fehlende_IH) > 50:
+    print("... (weitere gekürzt)")
+
+print("\n--- Fehlende ZH ---")
+print("Anzahl:", len(fehlende_ZH))
+for z, sp, zp in fehlende_ZH[:50]:
+    print(f"Zeile {z}: Standpunkt {sp}, Ziel {zp} (ZH nicht ermittelt)")
+if len(fehlende_ZH) > 50:
+    print("... (weitere gekürzt)")
+
+print("\n--- Standpunkt in CSV, aber kein StationRecord in JXL ---")
+print("Anzahl:", len(fehlender_StationRecord))
+for z, sp in fehlender_StationRecord[:50]:
+    print(f"Zeile {z}: Standpunkt {sp} (nicht in JXL als StationName gefunden)")
+if len(fehlender_StationRecord) > 50:
+    print("... (weitere gekürzt)")