From b379d46b23b43c680820a997a6dd981b428df57c Mon Sep 17 00:00:00 2001 From: ahmadtourei Date: Mon, 13 Oct 2025 14:29:01 -0600 Subject: [PATCH 1/5] feat(counter): distinguish transient vs impulsive anomalies --- das_anomaly/count/counter.py | 227 ++++++++++++++++++++++++------- tests/test_count/test_counter.py | 51 ++++--- 2 files changed, 207 insertions(+), 71 deletions(-) diff --git a/das_anomaly/count/counter.py b/das_anomaly/count/counter.py index 345bdbf..876c559 100644 --- a/das_anomaly/count/counter.py +++ b/das_anomaly/count/counter.py @@ -1,83 +1,208 @@ -""" -das_anomaly.count_counter -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Count how many lines in the *results* folder contain the keyword -“anomaly” (or any keyword you choose) and write a summary file. - -Example -------- ->>> from das_anomaly.count.counter import CounterConfig, AnomalyCounter ->>> cfg = CounterConfig(keyword="anomaly") ->>> anomalies = AnomalyCounter(cfg).run() ->>> print(anomalies) -""" - from __future__ import annotations +import re from dataclasses import dataclass +from datetime import datetime, timedelta from pathlib import Path +from typing import Any from das_anomaly import search_keyword_in_files from das_anomaly.settings import SETTINGS +# Helpers +_PATH_RE = re.compile(r"(/[^:\n]+?\.(?:png|jpg|jpeg|tif|tiff|bmp))", re.IGNORECASE) +_TIME_RE = re.compile( + r"(?P\d{4})_(?P\d{2})_(?P\d{2})T(?P

\d{2})_(?P\d{2})_(?P\d{2})__" + r"(?P\d{4})_(?P\d{2})_(?P\d{2})T(?P

\d{2})_(?P\d{2})_(?P\d{2})" +) + + +def _extract_path(line: str) -> str | None: + m = _PATH_RE.search(line) + return m.group(1) if m else None + + +def _parse_window_from_path(p: str) -> tuple[datetime, datetime] | None: + base = Path(p).name + m = _TIME_RE.search(base) + if not m: + return None + try: + start = datetime( + int(m["y1"]), + int(m["m1"]), + int(m["d1"]), + int(m["H1"]), + int(m["M1"]), + int(m["S1"]), + ) + end = datetime( + int(m["y2"]), + int(m["m2"]), + int(m["d2"]), + int(m["H2"]), + int(m["M2"]), + int(m["S2"]), + ) + return start, end + except ValueError: + return None + -# ------------------------------------------------------------------ # # Configuration object # -# ------------------------------------------------------------------ # @dataclass class CounterConfig: - """Where to look and what to search for.""" - - # folders results_path: Path | str = SETTINGS.RESULTS_PATH - - # search term keyword: str = "anomaly" + # counting behavior + collapse_transients: bool = False + max_gap_seconds: int = 0 + + # NEW: also report transient vs impulsive breakdown + classify_types: bool = False # if True, report counts for transient & impulsive + def __post_init__(self): self.results_path = Path(self.results_path).expanduser() self.target_dir = self.results_path / "count" self.target_dir.mkdir(parents=True, exist_ok=True) - # derived output file @property def summary_file(self) -> Path: - """Configurate file path for writing counted results.""" - return self.target_dir / f"Counted_{self.keyword}.txt" + if self.classify_types: + suffix = "_classified" + else: + suffix = "_collapsed" if self.collapse_transients else "" + return self.target_dir / f"Counted_{self.keyword}{suffix}.txt" -# ------------------------------------------------------------------ # # Counter # -# ------------------------------------------------------------------ # class AnomalyCounter: - """ - Tally keyword hits in text outputs from *detect_anomalies*. - - Parameters - ---------- - cfg: - A :class:`CounterConfig` instance describing paths and keyword. - - Returns - ------- - int - Total number of matching lines. - """ - def __init__(self, cfg: CounterConfig): self.cfg = cfg - def run(self) -> int: - """Perform the search, write a summary file, and return the count.""" - total, lines = search_keyword_in_files(self.cfg.results_path, self.cfg.keyword) - - summary_line = f"Total detected '{self.cfg.keyword}': {total}" + def _prepare_items(self, lines: list[str]) -> list[dict[str, Any]]: + items: list[dict[str, Any]] = [] + for ln in lines: + p = _extract_path(ln) + if not p: + continue + tw = _parse_window_from_path(p) + # sort by start-time when available, else by BASENAME (deterministic) + name = Path(p).name + sort_key = (tw[0], tw[1]) if tw else (datetime.max, name) + items.append({"line": ln, "path": p, "tw": tw, "sort_key": sort_key}) + items.sort(key=lambda d: d["sort_key"]) + return items + + def _group_windows(self, items: list[dict[str, Any]]) -> list[list[dict[str, Any]]]: + """Group overlapping/adjacent windows using max_gap_seconds.""" + if not items: + return [] + tol = timedelta(seconds=max(0, int(self.cfg.max_gap_seconds))) + groups: list[list[dict[str, Any]]] = [] + for it in items: + if not groups: + groups.append([it]) + continue + prev = groups[-1][-1] + prev_tw, curr_tw = prev["tw"], it["tw"] + if prev_tw and curr_tw and (curr_tw[0] <= (prev_tw[1] + tol)): + groups[-1].append(it) + else: + groups.append([it]) + return groups + + def _group_transients(self, lines: list[str]) -> tuple[int, list[str]]: + """(Existing) return collapsed count and one representative per group.""" + items = self._prepare_items(lines) + groups = self._group_windows(items) + reps = [g[0]["line"] for g in groups] + return len(groups), reps + + # NEW: classify transient vs impulsive + def _classify_groups( + self, lines: list[str] + ) -> tuple[int, int, int, list[str], list[str]]: + """ + Return + ------ + n_groups, n_transient, n_impulsive, transient_reps, impulsive_reps + """ + items = self._prepare_items(lines) + groups = self._group_windows(items) + + n_groups = len(groups) + transient_reps: list[str] = [] + impulsive_reps: list[str] = [] + + for g in groups: + if len(g) >= 2: + transient_reps.append(g[0]["line"]) + else: + impulsive_reps.append(g[0]["line"]) + + n_transient = len(transient_reps) + n_impulsive = len(impulsive_reps) + return n_groups, n_transient, n_impulsive, transient_reps, impulsive_reps + + def run( + self, + collapse_transients: bool | None = None, + classify_types: bool | None = None, + ) -> str: + flag_collapse = ( + self.cfg.collapse_transients + if collapse_transients is None + else bool(collapse_transients) + ) + flag_classify = ( + self.cfg.classify_types if classify_types is None else bool(classify_types) + ) + + total_raw, lines = search_keyword_in_files( + self.cfg.results_path, self.cfg.keyword + ) + + out_lines: list[str] = [] + if lines: + out_lines.append("# Raw matches") + out_lines.extend(lines) + out_lines.append("") + + if flag_classify: + n_groups, n_transient, n_impulsive, trans_reps, imp_reps = ( + self._classify_groups(lines) + ) + summary_line = ( + f"Total '{self.cfg.keyword}' events (grouped): {n_groups}; raw hits: {total_raw}" + f"(transient groups ≥2: {n_transient}, impulsive singles: {n_impulsive})" + ) + if trans_reps: + out_lines.append("# Transient representatives (one per group, size ≥2)") + out_lines.extend(trans_reps) + out_lines.append("") + if imp_reps: + out_lines.append("# Impulsive representatives (group size = 1)") + out_lines.extend(imp_reps) + out_lines.append("") + + elif flag_collapse: + collapsed_total, reps = self._group_transients(lines) + summary_line = ( + f"Total detected '{self.cfg.keyword}' (collapsed transients): {collapsed_total} " + f"(raw hits: {total_raw})" + ) + if reps: + out_lines.append("# Collapsed representatives (one per group)") + out_lines.extend(reps) + out_lines.append("") + else: + summary_line = f"Total detected '{self.cfg.keyword}': {total_raw}" + + out_lines.append(summary_line) with self.cfg.summary_file.open("w") as fh: - if lines: - fh.write("\n".join(lines) + "\n") - fh.write(summary_line + "\n") + fh.write("\n".join(out_lines) + "\n") - result_msg = summary_line + "\n" + f"Text file saved at {self.cfg.summary_file}" - return result_msg + return summary_line + "\n" + f"Text file saved at {self.cfg.summary_file}" diff --git a/tests/test_count/test_counter.py b/tests/test_count/test_counter.py index 7bb3e5a..08e2f65 100644 --- a/tests/test_count/test_counter.py +++ b/tests/test_count/test_counter.py @@ -15,27 +15,38 @@ def _mk_txt(where: Path, name: str, text: str) -> None: class TestAnomalyCounter: """Happy-path and edge-case checks for the counter module.""" - def test_run_counts_and_writes(self, tmp_path: Path): - """Three matching lines → summary says 3 and file holds 4 lines.""" - out_root = tmp_path / "out" - out_root.mkdir() - - # keyword must be *last* token in the matching lines - _mk_txt(out_root, "a.txt", "one anomaly\nno hit here\nanother anomaly\n") - _mk_txt(out_root, "b.txt", "still no\nthird anomaly\n") - - cfg = CounterConfig(results_path=out_root, keyword="anomaly") - summary = AnomalyCounter(cfg).run() - - expected_msg = ( - f"Total detected 'anomaly': 3\n" f"Text file saved at {cfg.summary_file}" - ) - assert summary == expected_msg - text_lines = cfg.summary_file.read_text().splitlines() - # 3 matches + summary line - assert len(text_lines) == 4 - assert text_lines[-1] == "Total detected 'anomaly': 3" +def test_run_counts_and_writes(tmp_path: Path): + """Three matching lines → summary says 3 and file layout includes header, matches, blank, summary.""" + out_root = tmp_path / "out" + out_root.mkdir() + + # keyword must be *last* token in the matching lines + _mk_txt(out_root, "a.txt", "one anomaly\nno hit here\nanother anomaly\n") + _mk_txt(out_root, "b.txt", "still no\nthird anomaly\n") + + cfg = CounterConfig(results_path=out_root, keyword="anomaly") + summary = AnomalyCounter(cfg).run() + + expected_msg = f"Total detected 'anomaly': 3\nText file saved at {cfg.summary_file}" + assert summary == expected_msg + + text_lines = cfg.summary_file.read_text().splitlines() + + # New file structure: + # 0: "# Raw matches" + # 1..3: three matching lines + # 4: "" (blank) + # 5: summary line + assert len(text_lines) == 6 + assert text_lines[0] == "# Raw matches" + assert text_lines[4] == "" + assert text_lines[-1] == "Total detected 'anomaly': 3" + + # Be robust to ordering of matches (depends on search implementation) + matches = [ln for ln in text_lines if ln.endswith("anomaly")] + assert len(matches) == 3 + assert set(matches) == {"one anomaly", "another anomaly", "third anomaly"} def test_custom_keyword(self, tmp_path: Path): """Case-sensitive search for 'foo' finds exactly two matches.""" From b804e0d4c9188a304ae319827ff5b8c2f0a67a49 Mon Sep 17 00:00:00 2001 From: ahmadtourei Date: Mon, 13 Oct 2025 14:35:40 -0600 Subject: [PATCH 2/5] update docs --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a48870c..f28a62c 100644 --- a/README.md +++ b/README.md @@ -172,7 +172,7 @@ AnomalyDetector(cfg).run() AnomalyDetector(cfg).run_parallel() # count number of detected anomalies -cfg = CounterConfig(keyword="anomaly") +cfg = CounterConfig(keyword="anomaly", classify_types=True, max_gap_seconds=0) anomalies = AnomalyCounter(cfg).run() print(anomalies) # prints info on number of anomalies and path to them ``` From 2bb7ad28c973587535118fec6cd000f1dd1cad89 Mon Sep 17 00:00:00 2001 From: ahmadtourei Date: Wed, 3 Dec 2025 13:10:23 -0700 Subject: [PATCH 3/5] try fix CI --- .DS_Store | Bin 6148 -> 6148 bytes .github/workflows/run_test.yml | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.DS_Store b/.DS_Store index 1fb2b804c8fda766e33fc7cd47f99c1eec014674..112055a28456e9f3e6436710d3890440661b3c1c 100644 GIT binary patch delta 155 zcmZoMXfc=|#>B!ku~2NHo+2a5#(>?7ivyUM7+EI&Wztz}!Ne%a!jQs{$WX$Nm{eX| zkd%|3#K6F?W3mfVp}ItMwV|1jj)JLit&T#qp`p1ckZo#OTg%BIs;qAv6rY`wo0s1; x`7V<@Z#PgE+@#H#%-B)qu~2NHo+2aD#(>?7lMO^zCb4QRwqRl0tibl1abv>^rp@de{2V}a an*}+(Gf(ChapYhC0!9V~mdz0&YnTDSK@q6{ diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml index a036a0f..c0782f2 100644 --- a/.github/workflows/run_test.yml +++ b/.github/workflows/run_test.yml @@ -22,7 +22,7 @@ jobs: - name: Install project + test deps run: | - python -m pip install -U pip + python -m pip install --upgrade pip python -m pip install -e .[test] From 7f667a3379a11c2a56bc8e7f16f2192270628e33 Mon Sep 17 00:00:00 2001 From: ahmadtourei Date: Wed, 3 Dec 2025 13:30:45 -0700 Subject: [PATCH 4/5] try fix Ubunto dir file system iterate --- das_anomaly/detect/detector.py | 59 +++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/das_anomaly/detect/detector.py b/das_anomaly/detect/detector.py index b44cbdf..8bc7a6d 100644 --- a/das_anomaly/detect/detector.py +++ b/das_anomaly/detect/detector.py @@ -101,23 +101,50 @@ def __init__(self, cfg: DetectConfig): # -------------------------------------------------------------- # def run(self) -> None: """Score every PSD PNG and log / copy anomalies with single processor.""" - root_pngs = list(self.cfg.psd_path.glob("*.png")) - - # enumerate only directories (deterministic order) - subdirs = sorted([p for p in self.cfg.psd_path.iterdir() if p.is_dir()]) - - for i, folder in enumerate(self.cfg.psd_path.iterdir()): - if not folder.is_dir(): - continue - # If there are no subdirs, process root-level PNGs once - if not subdirs: - spectra = root_pngs - - spectra = ( - sorted(root_pngs + list(folder.glob("*.png"))) - if i == 0 - else sorted(folder.glob("*.png")) + psd_root = self.cfg.psd_path + + # Root-level PNGs (e.g., root_0.png, root_1.png, ...) + root_pngs = sorted(psd_root.glob("*.png")) + + # Subdirectories under psd_root (e.g., rank_0, rank_1, ...) + subdirs = sorted([p for p in psd_root.iterdir() if p.is_dir()]) + + # Case 1: no subdirs -> only root-level spectra + if not subdirs: + if not root_pngs: + # nothing to do + return + + spectra = root_pngs + out_file = ( + self.cfg.results_path + / f"{psd_root.name}_output_model_{self.cfg.size}_anomaly.txt" ) + with out_file.open("w") as fh: + for j, img_path in enumerate(spectra): + flag = check_if_anomaly( + encoder_model=self.encoder, + size=self.cfg.size, + img_path=img_path, + kde=self.kde, + density_threshold=self.cfg.density_threshold, + mse_threshold=self.cfg.mse_threshold, + ) + print(f"Line {j}, image {img_path}: {flag}", file=fh) + + if flag.endswith("anomaly"): + shutil.copy(img_path, self.dest_dir) + return + + # Case 2: at least one subdir exists + # - first subdir: process root PNGs + its PNGs + # - remaining subdirs: process their PNGs only + for i, folder in enumerate(subdirs): + if i == 0: + spectra = sorted(root_pngs + list(folder.glob("*.png"))) + else: + spectra = sorted(folder.glob("*.png")) + out_file = ( self.cfg.results_path / f"{folder.name}_output_model_{self.cfg.size}_anomaly.txt" From 0f77c7df6232771ee696070f1dc1318d7f48e4c2 Mon Sep 17 00:00:00 2001 From: ahmadtourei Date: Wed, 3 Dec 2025 13:37:49 -0700 Subject: [PATCH 5/5] try fix Windows interactive plotting --- das_anomaly/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/das_anomaly/utils.py b/das_anomaly/utils.py index b7bab0e..6025019 100644 --- a/das_anomaly/utils.py +++ b/das_anomaly/utils.py @@ -7,6 +7,9 @@ import os import string +import matplotlib + +matplotlib.use("Agg") import matplotlib.pyplot as plt import numpy as np import scipy.fftpack as ft