Masterprojekt_V3/Vorbereitungen_Fabian/Test.py

import csv
import xml.etree.ElementTree as ET
from decimal import Decimal, getcontext, ROUND_HALF_UP

# ========= Einstellungen =========
JXL_IN = r"C:\Users\fabia\Desktop\Masterprojekt_V3\Daten\campusnetz_bereinigt_plus_nachmessung.jxl"
CSV_IN = r"C:\Users\fabia\Desktop\Masterprojekt_V3\Daten\campsnetz_beobachtungen_plus_nachmessungen.csv"
CSV_OUT = r"C:\Users\fabia\Desktop\Masterprojekt_V3\Daten\campsnetz_beobachtungen_plus_nachmessungen_korrigiert.csv"

getcontext().prec = 70


# ========= Hilfsfunktionen =========
def count_decimals(s: str, sep: str) -> int:
    if s is None:
        return 0
    s = s.strip()
    if s == "":
        return 0
    if ":ZH:" in s:
        s = s.split(":ZH:", 1)[0].strip()
    if sep in s:
        return len(s.split(sep, 1)[1])
    return 0


def fmt_decimal_fixed(x: Decimal, decimals: int, sep: str) -> str:
    q = Decimal("1") if decimals == 0 else Decimal("1." + ("0" * decimals))
    y = x.quantize(q, rounding=ROUND_HALF_UP)
    txt = format(y, "f")
    if sep != ".":
        txt = txt.replace(".", sep)
    if decimals == 0:
        txt = txt.split(sep)[0]
    return txt


def parse_decimal_csv(s: str) -> Decimal:
    """
    CSV-Zahlen mit Komma, evtl. mit ":ZH:..." im letzten Feld.
    """
    s = (s or "").strip()
    if ":ZH:" in s:
        s = s.split(":ZH:", 1)[0].strip()
    s = s.replace(",", ".")
    return Decimal(s)


def parse_decimal_comma(s: str) -> Decimal:
    """
    Komma-String nach Decimal.
    """
    return Decimal((s or "").strip().replace(",", "."))


def deg_to_gon_str(deg_str: str) -> str:
    """
    JXL: Winkel in Grad (Dezimalpunkt).
    gon = deg * (10/9)
    Ausgabe mit exakt so vielen Nachkommastellen wie im JXL-Gradwert enthalten.
    Dezimaltrennzeichen: Komma.
    """
    deg_str = (deg_str or "").strip()
    d = count_decimals(deg_str, ".")
    deg = Decimal(deg_str)
    gon = deg * (Decimal(10) / Decimal(9))
    return fmt_decimal_fixed(gon, d, ",")


def meter_str_from_jxl(m_str: str) -> str:
    """
    JXL: Distanz in Meter (Dezimalpunkt).
    Ausgabe mit exakt so vielen Nachkommastellen wie in der JXL enthalten.
    Dezimaltrennzeichen: Komma.
    """
    m_str = (m_str or "").strip()
    d = count_decimals(m_str, ".")
    return fmt_decimal_fixed(Decimal(m_str), d, ",")


def is_obs_line(row: list[str]) -> bool:
    """
    Beobachtungszeile: Zielpunkt nicht leer, Hz/Z/SD numerisch parsebar.
    Zielpunkt darf alphanumerisch sein (FH3 etc.).
    """
    if len(row) < 4:
        return False
    if row[0].strip() == "" or row[1].strip() == "" or row[2].strip() == "" or row[3].strip() == "":
        return False
    try:
        _ = parse_decimal_csv(row[1])
        _ = parse_decimal_csv(row[2])
        _ = parse_decimal_csv(row[3])
        return True
    except Exception:
        return False


def is_station_candidate(row: list[str]) -> bool:
    """
    Kandidat für Standpunkt: erste Spalte nicht leer, Messspalten leer.
    Ob es wirklich ein Standpunkt ist, entscheiden wir später über StationName-Menge.
    """
    if len(row) < 4:
        return False
    return (
        row[0].strip() != ""
        and row[1].strip() == ""
        and row[2].strip() == ""
        and row[3].strip() == ""
    )


def csv_is_rounding_of_jxl(csv_str: str, jxl_full_str: str) -> bool:
    """
    Prüft: CSV ist gerundete Darstellung des JXL-Wertes.
    Kriterium:
      - CSV hat weniger Nachkommastellen als JXL
      - und: JXL auf CSV-Dezimalstellen gerundet == CSV-Wert (numerisch)
    """
    dc = count_decimals(csv_str, ",")
    dj = count_decimals(jxl_full_str, ",")
    if dc >= dj:
        return False

    try:
        csv_val = parse_decimal_csv(csv_str)
        jxl_val = parse_decimal_comma(jxl_full_str)

        q = Decimal("1") if dc == 0 else Decimal("1." + ("0" * dc))
        jxl_rounded = jxl_val.quantize(q, rounding=ROUND_HALF_UP)
        csv_q = csv_val.quantize(q, rounding=ROUND_HALF_UP)

        return jxl_rounded == csv_q
    except Exception:
        return False


# ========= JXL einlesen =========
tree = ET.parse(JXL_IN)
root = tree.getroot()

# StationRecords: (StationName, StationID, IH)
station_records = []
for sr in root.iter("StationRecord"):
    sname = (sr.findtext("StationName") or "").strip()
    sid = (sr.attrib.get("ID") or "").strip()
    ih = (sr.findtext("TheodoliteHeight") or "").strip()
    if sname != "" and sid != "":
        station_records.append((sname, sid, ih))

station_names_set = {sname for sname, _, _ in station_records}

# pro StationName ggf. mehrere Aufbauten -> "nächsten unbenutzten" nehmen
stationname_to_records = {}
for sname, sid, ih in station_records:
    stationname_to_records.setdefault(sname, []).append((sid, ih))
stationname_usecount = {k: 0 for k in stationname_to_records.keys()}

# TargetHeight je TargetRecord-ID
target_height_by_id = {}
for tr in root.iter("TargetRecord"):
    tid = (tr.attrib.get("ID") or "").strip()
    zh = (tr.findtext("TargetHeight") or "").strip()
    if tid != "":
        target_height_by_id[tid] = zh

# Pro StationID: Sequenz der PointRecords
station_seq = {sid: [] for _, sid, _ in station_records}

for pr in root.iter("PointRecord"):
    stid = (pr.findtext("StationID") or "").strip()
    if stid == "" or stid not in station_seq:
        continue

    circle = pr.find("Circle")
    if circle is None:
        continue

    target_name = (pr.findtext("Name") or "").strip()
    target_id = (pr.findtext("TargetID") or "").strip()

    hz_deg = (circle.findtext("HorizontalCircle") or "").strip()
    z_deg = (circle.findtext("VerticalCircle") or "").strip()
    sd_m = (circle.findtext("EDMDistance") or "").strip()

    if target_name == "" or hz_deg == "" or z_deg == "" or sd_m == "":
        continue

    station_seq[stid].append({
        "target": target_name,
        "hz_gon": deg_to_gon_str(hz_deg),
        "z_gon": deg_to_gon_str(z_deg),
        "sd_m": meter_str_from_jxl(sd_m),
        "zh": target_height_by_id.get(target_id, ""),
    })


# ========= Matching-Funktion =========
def pick_jxl_entry_for_obs(seq, start_ptr, zp, hz_csv, z_csv, sd_csv, search_window=200):
    """
    Standard: nimmt seq[start_ptr]
    Wenn target nicht passt: sucht im Fenster nach passendem zp.
    Bei Mehrfachtreffern wird bevorzugt, wo gerundete Werte passen.
    """
    if start_ptr >= len(seq):
        return None, start_ptr

    first = seq[start_ptr]
    if first["target"] == zp:
        return first, start_ptr + 1

    end = min(len(seq), start_ptr + search_window)
    candidates = []
    for i in range(start_ptr, end):
        if seq[i]["target"] == zp:
            candidates.append((i, seq[i]))

    if not candidates:
        return first, start_ptr + 1

    if len(candidates) == 1:
        i, entry = candidates[0]
        return entry, i + 1

    good = []
    for i, entry in candidates:
        ok_hz = csv_is_rounding_of_jxl(hz_csv, entry["hz_gon"])
        ok_z = csv_is_rounding_of_jxl(z_csv, entry["z_gon"])
        ok_sd = csv_is_rounding_of_jxl(sd_csv, entry["sd_m"])
        score = int(ok_hz) + int(ok_z) + int(ok_sd)
        good.append((score, i, entry))

    good.sort(key=lambda t: (-t[0], t[1]))
    _, i_best, entry_best = good[0]
    return entry_best, i_best + 1


# ========= CSV verarbeiten =========
repl_counts = {"Hz": 0, "Z": 0, "SD": 0}
current_station_id = None
current_station_ptr = 0

line_no = 0

fehlende_IH = []                 # (zeilennummer, standpunkt)
fehlende_ZH = []                 # (zeilennummer, standpunkt, zielpunkt)
fehlender_StationRecord = []     # (zeilennummer, standpunkt_text)

current_station_name = None

with open(CSV_IN, newline="", encoding="utf-8") as fin, open(CSV_OUT, "w", newline="", encoding="utf-8") as fout:
    reader = csv.reader(fin, delimiter=";")
    writer = csv.writer(fout, delimiter=";", lineterminator="\n")

    for row in reader:
        line_no += 1

        if len(row) < 4:
            row = row + [""] * (4 - len(row))

        # ---- Standpunkt-Kandidat? ----
        if is_station_candidate(row):
            sp = row[0].strip()

            # Nur als Standpunkt behandeln, wenn er wirklich in der JXL als StationName existiert:
            if sp in station_names_set:
                use = stationname_usecount.get(sp, 0)
                recs = stationname_to_records[sp]
                if use >= len(recs):
                    raise RuntimeError(f"Standpunkt {sp} kommt in CSV öfter vor als in der JXL (StationRecords).")

                sid, ih = recs[use]
                stationname_usecount[sp] = use + 1

                current_station_name = sp
                current_station_id = sid
                current_station_ptr = 0

                # fehlende IH loggen
                if ih is None or str(ih).strip() == "":
                    fehlende_IH.append((line_no, sp))

                writer.writerow([sp, f"IH:{ih}", "", "", ""])
                continue

            # NICHT in JXL: wenn es wie ein Standpunkt aussieht -> loggen
            if sp.isdigit():
                fehlender_StationRecord.append((line_no, sp))

            writer.writerow(row)
            continue

        # ---- Beobachtung? ----
        if is_obs_line(row) and current_station_id is not None:
            zp = row[0].strip()
            hz_csv = row[1].strip()
            z_csv = row[2].strip()
            sd_csv = row[3].strip()

            seq = station_seq.get(current_station_id, [])
            jxl_entry, new_ptr = pick_jxl_entry_for_obs(seq, current_station_ptr, zp, hz_csv, z_csv, sd_csv)

            if jxl_entry is None:
                writer.writerow(row)
                continue

            current_station_ptr = new_ptr

            hz_out = hz_csv
            z_out = z_csv
            sd_out = sd_csv

            if csv_is_rounding_of_jxl(hz_csv, jxl_entry["hz_gon"]):
                hz_out = jxl_entry["hz_gon"]
                repl_counts["Hz"] += 1

            if csv_is_rounding_of_jxl(z_csv, jxl_entry["z_gon"]):
                z_out = jxl_entry["z_gon"]
                repl_counts["Z"] += 1

            if csv_is_rounding_of_jxl(sd_csv, jxl_entry["sd_m"]):
                sd_out = jxl_entry["sd_m"]
                repl_counts["SD"] += 1

            # fehlende ZH loggen
            zh_val = jxl_entry.get("zh", "")
            if zh_val is None or str(zh_val).strip() == "":
                fehlende_ZH.append((line_no, current_station_name, zp))

            last_col = f"{sd_out}:ZH:{zh_val}" if str(zh_val).strip() != "" else sd_out
            writer.writerow([zp, hz_out, z_out, last_col])
            continue

        # ---- alles andere unverändert ----
        writer.writerow(row)

print("Fertig.")
print("Ausgabe:", CSV_OUT)
print("Ersetzungen (Rundung -> JXL volle Nachkommastellen):", repl_counts)

print("\n--- Fehlende IH ---")
print("Anzahl:", len(fehlende_IH))
for z, sp in fehlende_IH[:50]:
    print(f"Zeile {z}: Standpunkt {sp} (IH leer in JXL)")
if len(fehlende_IH) > 50:
    print("... (weitere gekürzt)")

print("\n--- Fehlende ZH ---")
print("Anzahl:", len(fehlende_ZH))
for z, sp, zp in fehlende_ZH[:50]:
    print(f"Zeile {z}: Standpunkt {sp}, Ziel {zp} (ZH nicht ermittelt)")
if len(fehlende_ZH) > 50:
    print("... (weitere gekürzt)")

print("\n--- Standpunkt in CSV, aber kein StationRecord in JXL ---")
print("Anzahl:", len(fehlender_StationRecord))
for z, sp in fehlender_StationRecord[:50]:
    print(f"Zeile {z}: Standpunkt {sp} (nicht in JXL als StationName gefunden)")
if len(fehlender_StationRecord) > 50:
    print("... (weitere gekürzt)")