sot · taldcroft · Jul 11, 2025 · Jul 12, 2025 · Jul 12, 2025 · Sep 11, 2025
diff --git a/proseco/acq.py b/proseco/acq.py
@@ -1273,6 +1273,53 @@ def get_spoiler_stars(stars, acq, box_size):
     return spoilers
 
 
+def split_labeled_regions(dc_labeled, max_size=8):
+    """
+    Split labeled regions into smaller subregions.
+
+    This is used to ensure that the labeled regions corresponding to "search hits" are
+    not too large, which can happen if the dark current is mostly above the threshold.
+    The algorithm splits each labeled region into smaller subregions of size at most
+    `max_size` x `max_size` pixels. This ensures that a large region of high background
+    does not turn into just one isolated search hit.
+
+    Parameters
+    ----------
+    dc_labeled : np.ndarray
+        Labeled array where each contiguous region has a unique label.
+    max_size : int, optional
+        Maximum size of subregions to create, by default 8.
+
+    Returns
+    -------
+    dc_labeled : np.ndarray
+        New labeled array with subregions of size at most `max_size`.
+    n_regions : int
+        Number of labeled regions found.
+    """
+    # Find slices for each labeled region
+    slices = ndimage.find_objects(dc_labeled)
+    new_label = 1
+    dc_labeled_new = np.zeros_like(dc_labeled)
+    for i, slc in enumerate(slices):
+        region = dc_labeled[slc] == (i + 1)
+        region_shape = region.shape
+
+        # Split region into 8x8 subregions if needed
+        for r_start in range(0, region_shape[0], max_size):
+            for c_start in range(0, region_shape[1], max_size):
+                r_end = min(r_start + max_size, region_shape[0])
+                c_end = min(c_start + max_size, region_shape[1])
+                subregion = region[r_start:r_end, c_start:c_end]
+                if np.any(subregion):
+                    mask = np.zeros_like(region, dtype=bool)
+                    mask[r_start:r_end, c_start:c_end] = subregion
+                    dc_labeled_new[slc][mask] = new_label
+                    new_label += 1
+
+    return dc_labeled_new, new_label - 1
+
+
 def get_imposter_stars(
     dark,
     star_row,
@@ -1296,13 +1343,15 @@ def get_imposter_stars(
     :param star_col: col of acq star (float)
     :param thresh: PEA search hit threshold for a 2x2 block (e-/sec)
     :param maxmag: Max mag (alternate way to specify search hit ``thresh``)
-    :param box_size: box size (arcsec)
+    :param box_size: float or ACABox box size (arcsec)
     :param bgd: assumed flat background (float, e-/sec)
     :param mag_limit: Max mag for imposter (using 6x6 readout)
     :param test: hook for convenience in algorithm testing
 
     :returns: numpy structured array of imposter stars
     """
+    box_size = ACABox(box_size)
+
     # Convert row/col to array index coords unless testing.
     rc_off = 0 if test else 512
     acq_row = int(star_row + rc_off)
@@ -1346,13 +1395,20 @@ def get_imposter_stars(
     # contiguous regions above ``thresh`` labeled with a unique index.
     # This is a one-line way of doing the PEA merging process, roughly.
     dc_labeled, n_hits = ndimage.label(dc2x2 > thresh)
-    if test:
-        print(dc_labeled)
 
     # If no hits just return empty list
     if n_hits == 0:
         return []
 
+    if ACQ.search_hit_max_size is not None:
+        dc_labeled, n_hits = split_labeled_regions(
+            dc_labeled, max_size=ACQ.search_hit_max_size
+        )
+    if test:
+        # Print the entire dc_labeled array without clipping
+        with np.printoptions(threshold=100000, linewidth=200):
+            print(dc_labeled)
+
     outs = []
     for idx in range(n_hits):
         # Get row and col index vals for each merged region of search hits
@@ -1400,6 +1456,7 @@ def get_imposter_stars(
         yang, zang = pixels_to_yagzag(row, col, allow_bad=True)
 
         out = (
+            len(vals),
             row,
             col,
             row - star_row,
@@ -1417,6 +1474,7 @@ def get_imposter_stars(
 
     if len(outs) > 0:
         dtype = [
+            ("n_pix", "<i8"),  # Number of pixels in the search hit
             ("row", "<f8"),
             ("col", "<f8"),
             ("d_row", "<f8"),

diff --git a/proseco/characteristics_acq.py b/proseco/characteristics_acq.py
@@ -100,3 +100,8 @@ def _get_fid_acq_stages():
 
 # Index template file name
 index_template_file = "index_template_acq.html"
+
+# Search hit max size in pixels.  This is used to limit the size of search hits
+# that are used in the acquisition algorithm. See `split_labeled_regions` for
+# details. Set to None to disable the size limit.
+search_hit_max_size = 8
diff --git a/proseco/tests/test_acq.py b/proseco/tests/test_acq.py
@@ -23,6 +23,7 @@
     get_image_props,
     get_imposter_stars,
     get_p_man_err,
+    split_labeled_regions,
 )
 from ..catalog import get_aca_catalog
 from ..core import ACABox, StarsTable
@@ -1375,3 +1376,65 @@ def test_acq_include_ids_all_halfws_full_catalog_with_spoiler():
     assert np.all(aca.acqs["id"] == ids)
     assert np.allclose(aca.acqs["mag"], mags)
     assert np.all(aca.acqs["halfw"] == halfws)
+
+
+img_input = np.array(
+    [
+        [4, 4, 0, 1, 1, 0],
+        [0, 0, 0, 1, 1, 0],
+        [1, 1, 1, 1, 1, 0],
+        [0, 2, 2, 3, 1, 0],
+        [0, 2, 2, 0, 1, 0],
+        [0, 0, 0, 0, 0, 0],
+    ]
+)
+
+test_split_labeled_regions_cases = [
+    {
+        "img_exp": [
+            [4, 4, 0, 1, 1, 0],
+            [0, 0, 0, 1, 1, 0],
+            [1, 1, 1, 1, 1, 0],
+            [0, 2, 2, 3, 1, 0],
+            [0, 2, 2, 0, 1, 0],
+            [0, 0, 0, 0, 0, 0],
+        ],
+        "max_size": 5,
+        "n_region": 4,
+    },
+    {
+        "img_exp": [
+            [6, 6, 0, 1, 2, 0],
+            [0, 0, 0, 1, 2, 0],
+            [1, 1, 1, 1, 2, 0],
+            [0, 4, 4, 5, 2, 0],
+            [0, 4, 4, 0, 3, 0],
+            [0, 0, 0, 0, 0, 0],
+        ],
+        "max_size": 4,
+        "n_region": 6,
+    },
+    {
+        "img_exp": [
+            [9, 9, 0, 1, 2, 0],
+            [0, 0, 0, 1, 2, 0],
+            [3, 3, 4, 4, 5, 0],
+            [0, 7, 7, 8, 5, 0],
+            [0, 7, 7, 0, 6, 0],
+            [0, 0, 0, 0, 0, 0],
+        ],
+        "max_size": 2,
+        "n_region": 9,
+    },
+]
+
+
+@pytest.mark.parametrize("test_case", test_split_labeled_regions_cases)
+def test_split_labeled_regions(test_case):
+    img_exp = test_case["img_exp"]
+    max_size = test_case["max_size"]
+    n_region = test_case["n_region"]
+
+    img_split, n_regions = split_labeled_regions(img_input, max_size=max_size)
+    assert n_regions == n_region
+    assert np.all(img_split == img_exp)
diff --git a/ruff.toml b/ruff.toml
@@ -1,9 +1,7 @@
 extend = "ruff-base.toml"
 
 # These are files to exclude for this project.
-extend-exclude = [
-  "**/*.ipynb",  # commonly not ruff-compliant
-]
+# extend-exclude = []
 
 # These are rules that commonly cause many ruff warnings. Code will be improved by
 # incrementally fixing code to adhere to these rules, but for practical purposes they