diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 6b0342b..cd9ef43 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -31,7 +31,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Install the cosign tool except on PR # https://github.com/sigstore/cosign-installer @@ -39,17 +39,19 @@ jobs: if: github.event_name != 'pull_request' uses: sigstore/cosign-installer@v3.1.1 with: - cosign-release: 'v2.2.1' + cosign-release: 'v2.2.4' - # Workaround: https://github.com/docker/build-push-action/issues/461 - - name: Setup Docker buildx - uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf + # Set up BuildKit Docker container builder to be able to build + # multi-platform images and export cache + # https://github.com/docker/setup-buildx-action + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0 # Login against a Docker registry except on PR # https://github.com/docker/login-action - name: Log into registry ${{ env.REGISTRY }} if: github.event_name != 'pull_request' - uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c + uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0 with: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} @@ -59,7 +61,7 @@ jobs: # https://github.com/docker/metadata-action - name: Extract Docker metadata id: meta - uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 + uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} @@ -67,16 +69,18 @@ jobs: run: echo "TAG=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - + uses: docker/setup-qemu-action@v3 + # Build and push Docker image with Buildx (don't push on PR) # https://github.com/docker/build-push-action - name: Build and push Docker image id: build-and-push - uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a + uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0 with: context: . - platforms: linux/amd64,linux/arm64 push: ${{ github.event_name != 'pull_request' }} + platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..3b0cb0e --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,69 @@ +# This workflow will upload a Python Package to PyPI when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + release-build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - name: Build release distributions + run: | + # NOTE: put your own distribution build steps here. + python -m pip install build + python -m build + + - name: Upload distributions + uses: actions/upload-artifact@v4 + with: + name: release-dists + path: dist/ + + pypi-publish: + runs-on: ubuntu-latest + needs: + - release-build + permissions: + # IMPORTANT: this permission is mandatory for trusted publishing + id-token: write + + # Dedicated environments with protections for publishing are strongly recommended. + # For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules + environment: + name: pypi + url: https://pypi.org/p/backsub/ + # + # ALTERNATIVE: if your GitHub Release name is the PyPI project version string + # ALTERNATIVE: exactly, uncomment the following line instead: + # url: https://pypi.org/project/YOURPROJECT/${{ github.event.release.name }} + + steps: + - name: Retrieve release distributions + uses: actions/download-artifact@v4 + with: + name: release-dists + path: dist/ + + - name: Publish release distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist/ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..85f8d50 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +backsub/__pycache__ +dist/ +__pycache__ +.DS_Store +backsub/testing.ipynb +build/ +backsub.egg-info/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b25f54..060b348 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,15 +3,35 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v0.5.0 - [2025.11.##] + +Rework of Backsub to not have Palom as a dependency reducing the environment size and making it lightweight, and reducing the output file size, while keeping the time and memory usage efficiency. + +### `Added` +- `compression` parameter +- two RAM profiles: (1) default, uses moderate RAM. (2) Uses approximately half of the default RAM at the cost of a slight loss in precision of the calculation of the downsized dimensions of the pyramidal output image. This means the dimensions of the pyramidal level will differ between profile 1 and 2. The high-resolution level is not affected by this. +- organizes the tool in five scripts: (1) CLI, (2) ome-schema structure, (3) ome-schema writer, (4) background subtraction and writing of output image and (5) extraction of metadata from Lunaphore COMET images to create the marker table. +- logger has been re-designed. +- restructured README +- `tile-size` parameter now defaults to 256 instead of 1024 +- support for different data types - primarily np.uint8 in addition to np.uint16 + +### `Fixed` +- output image file-size is reduced by applying lossless compression ("LZW" by default) + +### `Removed` +- Palom and OpenCV as dependencies +- old scripts, version history should be checked instead. +- `chunksize` argument - chunking is done now only by the default dask.array behaviour + ## v0.4.1 - [2023.11.21] -Complete rework of Backsub to include Palom's pyramid writer (https://github.com/labsyspharm/palom). -Added dask array chunking and delayed execution for subtraction that happenes while the output pyramidal `ome.tif` is being created. -Added `CHANGELOG.md`. +The script has been rewritten to perform channel subtraction in a RAM-efficient manner - updating is highly recommended. If the output file is much bigger than expected, adjust the `--tile-size` parameter to a smaller value (e.g `512`). Changing the `--chunk-size` parameter may affect performance (lower values increase execution time, higher values increase RAM usage). ### `Added` -- `--chunk-size` parameter +- `--chunk-size` parameter for dask array chunking and delayed execution for subtraction that happens while the output pyramidal OME-TIFF is being created. - Palom's pyramid writer +- `CHANGELOG.md` ### `Fixed` - Fixed issue with RAM inefficiency - reworked Backsub. @@ -19,5 +39,6 @@ Added `CHANGELOG.md`. ### `Removed` - `--pyramid` tag introduced in v0.3.4, for smaller images, a smaller tile size should be specified now. +## Versions v0.2.0 and older: -I did not keep a changelog before version v0.4.1. \ No newline at end of file +The `markers.csv` file which gives details about the channels needs to contain the following columns: "Filter", "background" and "exposure". An exemplary [markers_old.csv](https://github.com/SchapiroLabor/Background_subtraction/files/9549686/markers.csv) file is given. The "Filter" column should specify the Filter used when acquiring images. If different stains are aquired with the same filter, the *exact same value* needs to be written (including background) as it is used for determining which background channel should be subtracted. The "background" column should contain logical `TRUE` values for channels which represent autofluorescence. The "exposure" column should contain the exposure time used for channel acquisition, and the measure unit should be consistent across the column. Exposure time is used for scaling the value of the background to be comparable to the processed channel. Usage of these versions is strongly disencouraged. \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 91574fc..f1dbacd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,25 @@ -FROM continuumio/miniconda3 - -COPY environment.yml . -RUN apt-get update -qq && apt-get install -y \ - build-essential \ - ffmpeg \ - libsm6 \ - libxext6 - -RUN conda env create -f environment.yml -ENV PATH="/opt/conda/envs/backsub/bin:$PATH" -WORKDIR /background_subtraction -COPY . . \ No newline at end of file +FROM mambaorg/micromamba:1.5.10-noble + +# Copy conda environment file +COPY --chown=$MAMBA_USER:$MAMBA_USER ./environment.yml /tmp/conda.yml + +# Install environment +RUN micromamba install -y -n base -f /tmp/conda.yml \ + && micromamba install -y -n base conda-forge::procps-ng \ + && micromamba env export --name base --explicit > environment.lock \ + && echo ">> CONDA_LOCK_START" \ + && cat environment.lock \ + && echo "<< CONDA_LOCK_END" \ + && micromamba clean -a -y + +# Switch to root to copy everything +USER root + +# Ensure micromamba binaries are in PATH +ENV PATH="$MAMBA_ROOT_PREFIX/bin:$PATH" + +# Copy the rest of the current directory into /app inside the container +WORKDIR /app +COPY . . + +RUN micromamba run -n base pip install . diff --git a/README.md b/README.md index 03b7f18..0e4fdf7 100644 --- a/README.md +++ b/README.md @@ -1,65 +1,158 @@ -# Background_subtraction +# Backsub - pixel-by-pixel channel subtraction tool for multiplexed immunofluorescence data -Pixel-by-pixel channel subtraction scaled by exposure times, primarily developed for images produced by the COMET platform and to work within the MCMICRO pipeline. Main usecase is autuofluorescence subtraction for multichannel and multicycle images for visualization of images from tissues with high autofluroescence (FFPE), improved segmentation, and quantification (if the previous two usecases aren't necessary, downstream subtraction of autofluorescent signal is encouraged as the script is memory inefficent). +Backsub performs pixel-by-pixel background subtraction between marker and background channels scaled by their respective exposure times. The outputs are saved as pyramidal OME-TIFF files. It was originally developed for data produced by the Lunaphore COMET platform and is fully compatible with the [MCMICRO](https://mcmicro.org) pipeline. + + +Example of pixel-wise autofluorescence subtraction with Backsub: +

+ Background subtraction example +

## Introduction -If there are background (autofluorescence) channels present in a `.tif` image, background subtraction should be performed so as not to skew the quantification counts of markers. The most precise way of subtracting background would be on a pixel-to-pixel basis. An alternative would be on a cell basis by just subtracting the background measurements from the marker measurements for each cell, however, for visual inspection of images, as well as future use of images as figures in published work, it is preferred to use this. +In multiplexed immunofluorescence images, autofluorescence and background signals can cause improper cell segmentation, and can affect downstream intensity quantification which is why, if possible, they should be subtracted from raw channel intensities. The most precise way of subtracting background would be on a pixel-to-pixel basis. An alternative would be to subtract the background measurements from the marker measurements for each cell after quantification, however, for visual inspection of images, segmentation, and data presentation, it is preferred to use the corrected values. -Background subtraction is performed using the following formula: +The primary use case is autofluorescence subtraction for multichannel and multicycle microscopy images to improve: +* image visualization of tissues with strong autofluorescence +* segmentation accuracy +* quantification quality (if the previous two usecases are not necessary, downstream subtraction of autofluorescence signal is encouraged instead) -Marker*corrected* = Marker*raw* - Background / Exposure*Background* * Exposure*Marker* +Background subtraction is performed using the following formula: +$Marker_{corrected} = Marker_{raw} - Background \times \frac{Exposure_{Marker}}{Exposure_{Background}}$ -## Usage +## Installation -The `markers.csv` file which gives details about the channels needs to contain the following columns: "marker_name", "background" and "exposure". An exemplary [markers.csv](https://github.com/SchapiroLabor/Background_subtraction/blob/main/example/markers.csv) file is given. The "marker_name" column should indicate the marker for the acquired channel and all values should be unique. The "background" column should indicate the marker name of the channel which needs to be subtracted. This value must match the "marker_name" value of the background channel. The "exposure" column should contain the exposure time used for channel acquisition, and the measure unit should be consistent across the column. Exposure time is used for scaling the value of the background to be comparable to the processed channel. The "remove" column should contain logical `TRUE` values for channels which should be exluded in the output image. +Backsub can be run either in a preconfigured Docker container or in a local conda environment. -### Versions v0.4.1 and newer: -The script has been rewritten to perform channel subtraction in a RAM-efficient manner - updating is highly recommended. If the output file is much bigger than expected, adjust the `--tile-size` parameter to a smaller value (e.g `512`). Changing the `--chunk-size` parameter may affect performance (lower values increase execution time, higher values increase RAM usage). +### Option 1: Docker +Pull the latest container from the GitHub Container Registry: +``` +docker pull ghcr.io/schapirolabor/background_subtraction:latest +``` +You can then run Backsub directly, mounting your input and output directories: +``` +docker run --rm -v $(pwd):/data ghcr.io/schapirolabor/background_subtraction:latest \ + python background_sub.py \ + -r /data/input_image.tif \ + -o /data/corrected_image.ome.tif \ + -m /data/markers.csv \ + -mo /data/markers_corrected.csv +``` +Note that all required dependencies are already included inside the container. -### Versions v0.2.0 and older: -The `markers.csv` file which gives details about the channels needs to contain the following columns: "Filter", "background" and "exposure". An exemplary [markers_old.csv](https://github.com/SchapiroLabor/Background_subtraction/files/9549686/markers.csv) file is given. The "Filter" column should specify the Filter used when acquiring images. If different stains are aquired with the same filter, the *exact same value* needs to be written (including background) as it is used for determining which background channel should be subtracted. The "background" column should contain logical `TRUE` values for channels which represent autofluorescence. The "exposure" column should contain the exposure time used for channel acquisition, and the measure unit should be consistent across the column. Exposure time is used for scaling the value of the background to be comparable to the processed channel. +If you want to build the container yourself, clone the repository first, then build it from the provided Dockerfile: +``` +git clone https://github.com/SchapiroLabor/Background_subtraction.git +cd Background_subtraction +docker build -t background_subtraction:latest . +``` +### Option 2: Conda +Clone the repository and create the Conda environment: +``` +git clone https://github.com/SchapiroLabor/Background_subtraction.git +cd Background_subtraction +conda env create -f environment.yml +conda activate backsub_env +``` +You can now run Backsub locally (note that you need to point to the tool's script): +``` +python backsub/background_sub.py -h +``` -### CLI +## Execution and usage -The script requires four inputs: -* the path to the starting image given with `-r` or `--root` -* the path to the output image given with `-o` or `--output` -* the path to the `markers.csv` file given with `-m` or `--markers` -* the path to the markers output file given with `-mo` or `--markerout` -Optional inputs: -* `--pixel-size` to specify the pixel size of the input image (default: `1.0`), if not specified, the pixel size will be read from the metadata of the input image. -* `--version` to print version and exit (added in v0.3.4) -* `--tile-size` to specify the tile size for the pyramidal output image (default: `1024`) (added in v0.3.4) -* `--chunk-size` to specify chunk size for delayed calculation execution (default: `5000`) - lower values result in higher execution time, higher values in higher RAM usage +### Inputs +A `TIFF` or `OME-TIFF` file containing multiplexed immunofluorescence data. -### Output +A `markers.csv` file should be provided to describe the channels of the image. Needs to contain the following columns: -The output image file will be a pyramidal `ome.tif` file containing the processed channels. The channels tagged for removal will be excluded from the final image. -The output markers file will be a `csv` file containing the following columns: "marker_name", "background", "exposure". The "marker_name" column will contain the marker names of the processed channels. The "background" column will contain the marker names of the channels used for subtraction. The "exposure" column will contain the exposure times of the processed channels. +| Column | Description | Required | +|------------- |------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |---------- | +| marker_name | Contains the channel names, all values **must** be unique. | yes | +| background | Specifies the channel that should be subtracted from the specified channel. The `background` value, if present, **must** match the `marker_name` value of the background channel. If no subtraction is necessary, the field can be left empty. | yes | +| exposure | Contains the exposure time used for channel acquisition in ms. | yes | +| remove | Optional column that allows the user to exclude certain channels from the output file by setting that channel's `remove` value to `TRUE`. | no | -### Docker usage +An exemplary [markers.csv](https://github.com/SchapiroLabor/Background_subtraction/blob/main/example/markers.csv) file is provided. -If you want to run the background subtraction directly from a pre-configured container with all the required packages, you can either build the docker container yourself or pull it from the Github container registry. +### Command Line Interface -To build the container run: +| Argument | Long form | Description | Specification | Default | Required | +|---------- |-------------------- |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |-------------------------------------------------------------- |--------- |---------- | +| -in | --input | File path to the input image file. | string, ends with `.tif`, `.tiff`, `.ome.tif` or `.ome.tiff` | | yes | +| -o | --output | File path where the output pyramidal OME-TIFF will be saved. | string, ends with `.ome.tif` or `.ome.tiff` | | yes | +| -m | --markers | File path to the markers (CSV) file containing the list of marker names and their respective background channels. | string, ends with `.csv` | | yes | +| -mo | --marker-output | File path where the output marker (CSV) file matching the output image will be saved. | string, ends with `.csv` | | yes | +| -mpp | --pixel-size | Pixel size provided in microns (microns per pixel). If not provided, image metadata will be checked. If that is not successful, the value will be set to 1. | float | None | no | +| -sr | --save-ram | Optional flag to approximately cut RAM usage in half. Note that the dimensions of the reduced resolution levels (sub-levels) of the output pyramidal image will slightly differ whether or not the argument is used. | boolean flag | false | no | +| -comp | --compression | The output pyramidal OME-TIFF will be compressed using the specified compression. Set to "none" for no compression. | string, either "lzw", "zlib", or "none" | "lzw" | no | +| -ts | --tile-size | Tile size used for writing pyramidal outputs. Note that the file size is smaller for smaller tile size values. | integer, multiple of 16 | 256 | no | +| -dsf | --downscale-factor | Downscale factor for pyramid layer creation. This value will only be used if the input image is NOT pyramidal. If the input image is pyramidal, the number of levels in the output image will be the same as in the input so the downscale factor won't be applied. | integer, larger than 1 | 2 | no | +| -v | --version | Prints Backsub version. | | | | +Example of a full command (note to provide full paths where applicable): ``` -git clone https://github.com/SchapiroLabor/Background_subtraction.git -docker build -t background_subtraction:latest . -docker run background_subtraction:latest python background_sub.py +python Background_subtraction/backsub/background_sub.py \ + --input /data/input_image.tif \ + --output /data/corrected_image.ome.tif \ + --markers /data/markers.csv \ + --marker-output /data/markers_corrected.csv \ + --pixel-size 0.65 \ + --tile-size 256 \ + --downscale-factor 2 \ + --compression zlib ``` -To pull the container from the Github container registry (ghcr.io): +### Outputs -``` -## Login to ghcr.io -docker login ghcr.io +The output image file will be a pyramidal `OME-TIFF` file containing the processed channels. The channels tagged for removal will be excluded from the final image. -## Pull container -docker pull ghcr.io/schapirolabor/background_subtraction:latest +The output markers file will be a `CSV` file containing the following columns: "marker_name", "background", "exposure". The "marker_name" column will contain the marker names of the processed channels. The "background" column will contain the marker names of the channels used for subtraction. The "exposure" column will contain the exposure times of the processed channels. + +## Features + +* Pixel-wise channel subtraction scaled by exposure time. +* Autofluroescence correction for multiplexed immunofluroescence images. +* Pyramidal OME-TIFF output compatible with the MCMICRO pipeline. +* Optional image compression to not bloat data size of large projects. +* Low-memory mode for local processing of large datasets. +* Automatic metadata extraction for Lunaphore COMET data. + +## Contributing + +Contributions are welcome! If you would like to contribute, please: +1. Fork the repository +2. Create a feature branch: +``` +git checkout -b feature/my-feature ``` +3. Commit your changes and open a pull request + +For questions of issues, please open a GitHub issue. + +## Contributors + +Author and maintainer: +* [Krešimir Beštak](@kbestak) + +Contributors: +* [Victor Perez](@VictorDidier) +* [Florian Wünnemann](@FloWuenne). + +## Changelog + +See the [CHANGELOG](https://github.com/SchapiroLabor/Background_subtraction/blob/main/CHANGELOG.md) file for deatils about new features, bug fixes, and version history. + +## License + +This project is licensed under the terms of the [MIT License](https://github.com/SchapiroLabor/Background_subtraction/blob/main/LICENSE). + +## Citation + +If you use Backsub in your work, please cite it as: + +> Bestak, K., Perez, V., & Wuennemann, F. (2025). Backsub: pixel-by-pixel channel subtraction tool for multiplexed immunofluorescence data. diff --git a/background_sub.py b/background_sub.py deleted file mode 100644 index 31a5b93..0000000 --- a/background_sub.py +++ /dev/null @@ -1,431 +0,0 @@ -import argparse -import pathlib - -import ome_types -import palom.pyramid -import palom.reader -import pandas as pd -import numpy as np -import copy -import dask.array as da -import numexpr as ne -import math -import cv2 -import tifffile -import tqdm -import zarr -from loguru import logger -from argparse import ArgumentParser as AP -from os.path import abspath -import time -### Using the palom pyramidal writer by Yu-An Chen: https://github.com/labsyspharm/palom/blob/main/palom/pyramid.py -### Adapted for this use case - only one image can be processed: - -class PyramidSetting: - - def __init__( - self, - downscale_factor=2, - tile_size=1024, - max_pyramid_img_size=1024 - ): - self.downscale_factor = downscale_factor - self.tile_size = tile_size - self.max_pyramid_img_size = max_pyramid_img_size - - def tile_shapes(self, base_shape): - shapes = np.array(self.pyramid_shapes(base_shape)) - n_rows_n_cols = np.ceil(shapes / self.tile_size) - tile_shapes = np.ceil(shapes / n_rows_n_cols / 16) * 16 - return [tuple(map(int, s)) for s in tile_shapes] - - def pyramid_shapes(self, base_shape): - num_levels = self.num_levels(base_shape) - factors = self.downscale_factor ** np.arange(num_levels) - shapes = np.ceil(np.array(base_shape) / factors[:,None]) - return [tuple(map(int, s)) for s in shapes] - - def num_levels(self, base_shape): - factor = max(base_shape) / self.max_pyramid_img_size - - return math.ceil(math.log(max(factor, 1), self.downscale_factor))+1 - -def format_channel_names(num_channels_each_mosaic, channel_names): - ''' - format_channel_names( - [1, 2, 3, 4, 5], ['x', 'x', ['x'], ['x', 'x']] - ) - >>> [ - 'x_1', - 'x_2', - 'x_3', - 'x_4', - 'x_5', - 'x_6', - 'Mosaic 4_1', - 'Mosaic 4_2', - 'Mosaic 4_3', - 'Mosaic 4_4' - ] - ''' - matched_channel_names = [] - for idx, (n, c) in enumerate( zip(channel_names, num_channels_each_mosaic*num_channels_each_mosaic[0]) ): - c=1 - nl = n - if type(n) == str: - nl = [n] - if len(nl) == 1: - nl = nl - matched_channel_names.append(nl) - return make_unique_str( - [n for l in matched_channel_names for n in l] - ) - -def make_unique_str(str_list): - if len(set(str_list)) == len(str_list): - return str_list - else: - max_length = max([len(s) for s in str_list]) - str_np = np.array(str_list, dtype=np.dtype(('U', max_length+10))) - unique, counts = np.unique(str_np, return_counts=True) - has_duplicate = unique[counts > 1] - for n in has_duplicate: - suffixes = [ - f"_{i}" - for i in range(1, (str_np == n).sum()+1) - ] - str_np[str_np == n] = np.char.add(n, suffixes) - return make_unique_str(list(str_np)) - -def normalize_mosaics(mosaics): - dtypes = set(m.dtype for m in mosaics) - if any([np.issubdtype(d, np.floating) for d in dtypes]): - max_dtype = np.dtype(np.float32) - else: - max_dtype = max(dtypes) - normalized = [] - for m in mosaics: - assert m.ndim == 2 or m.ndim == 3 - if m.ndim == 2: - m = m[np.newaxis, :] - normalized.append(m.astype(max_dtype, copy=False)) - return normalized - - -def write_pyramid( - mosaics, - output_path, - pixel_size=1, - channel_names=None, - verbose=True, - downscale_factor=4, - compression=None, - is_mask=False, - tile_size=None, - save_RAM=False, - kwargs_tifffile=None -): - mosaics = normalize_mosaics(mosaics) - ref_m = mosaics[0] - path = output_path - num_channels = count_num_channels(mosaics) - base_shape = ref_m.shape[1:3] - assert int(downscale_factor) == downscale_factor - assert downscale_factor < min(base_shape) - pyramid_setting = PyramidSetting( - downscale_factor=int(downscale_factor), - tile_size=max(ref_m.chunksize) - ) - num_levels = pyramid_setting.num_levels(base_shape) - tile_shapes = pyramid_setting.tile_shapes(base_shape) - shapes = pyramid_setting.pyramid_shapes(base_shape) - - if tile_size is not None: - assert tile_size % 16 == 0, ( - f"tile_size must be None or multiples of 16, not {tile_size}" - ) - tile_shapes = [(tile_size, tile_size)] * num_levels - print(tile_shapes) - dtype = ref_m.dtype - - software = f'backsub {_version}' - metadata = { - 'Creator': software, - 'Pixels': { - 'PhysicalSizeX': pixel_size, 'PhysicalSizeXUnit': '\u00b5m', - 'PhysicalSizeY': pixel_size, 'PhysicalSizeYUnit': '\u00b5m' - }, - } - - if channel_names is not None: - num_channels_each_mosaic = [ - count_num_channels([m]) - for m in mosaics - ] - names = format_channel_names(num_channels_each_mosaic, channel_names) - if len(names) == num_channels: - metadata.update({ - 'Channel': {'Name': names}, - }) - - logger.info(f"Writing to {path}") - with tifffile.TiffWriter(path, bigtiff=True) as tif: - kwargs = dict( - metadata=metadata, - software=software, - compression=compression - ) - if kwargs_tifffile is None: - kwargs_tifffile = {} - tif.write( - data=tile_from_combined_mosaics( - mosaics, tile_shape=tile_shapes[0], save_RAM=save_RAM - ), - shape=(num_channels, *shapes[0]), - subifds=int(num_levels - 1), - dtype=dtype, - tile=tile_shapes[0], - **{**kwargs, **kwargs_tifffile} - ) - logger.info('Generating pyramid') - for level, (shape, tile_shape) in enumerate( - zip(shapes[1:], tile_shapes[1:]) - ): - if verbose: - logger.info(f" Level {level+1} ({shape[0]} x {shape[1]})") - tif.write( - data=tile_from_pyramid( - path, - num_channels, - tile_shape=tile_shape, - downscale_factor=downscale_factor, - level=level, - is_mask=is_mask, - save_RAM=save_RAM - ), - shape=(num_channels, *shape), - subfiletype=1, - dtype=dtype, - tile=tile_shape, - **{ - **dict(compression=compression), - **kwargs_tifffile - } - ) - - -def count_num_channels(imgs): - for img in imgs: - assert img.ndim == 2 or img.ndim == 3 - return sum([ - 1 if img.ndim == 2 else img.shape[0] - for img in imgs - ]) - - -def tile_from_combined_mosaics(mosaics, tile_shape, save_RAM=False): - num_rows, num_cols = mosaics[0].shape[1:3] - h, w = tile_shape - n = len(mosaics) - for idx, m in enumerate(mosaics): - for cidx, c in enumerate(m): - # the performance is heavily degraded without pre-computing the - # mosaic channel - with tqdm.dask.TqdmCallback( - ascii=True, - desc=( - f"Assembling mosaic {idx+1:2}/{n:2} (channel" - f" {cidx+1:2}/{m.shape[0]:2})" - ), - ): - c = da_to_zarr(c) if save_RAM else c.compute() - for y in range(0, num_rows, h): - for x in range(0, num_cols, w): - yield np.array(c[y:y+h, x:x+w]) - # yield m[y:y+h, x:x+w].copy().compute() - c = None - -def tile_from_pyramid( - path, - num_channels, - tile_shape, - downscale_factor=2, - level=0, - is_mask=False, - save_RAM=False -): - # workaround progress bar - # https://forum.image.sc/t/tifffile-ome-tiff-generation-is-taking-too-much-ram/41865/26 - pbar = tqdm.tqdm(total=num_channels, ascii=True, desc=f'Processing channel') - for c in range(num_channels): - img = da.from_zarr(zarr.open(tifffile.imread( - path, series=0, level=level, aszarr=True - ))) - if img.ndim == 2: - img = img.reshape(1, *img.shape) - img = img[c] - # read using key seems to generate a RAM spike - # img = tifffile.imread(path, series=0, level=level, key=c) - if not is_mask: - img = img.map_blocks( - cv2.blur, - ksize=(downscale_factor, downscale_factor), anchor=(0, 0) - ) - img = da_to_zarr(img) if save_RAM else img.compute() - num_rows, num_columns = img.shape - h, w = tile_shape - h *= downscale_factor - w *= downscale_factor - last_c = range(num_channels)[-1] - last_y = range(0, num_rows, h)[-1] - last_x = range(0, num_columns, w)[-1] - for y in range(0, num_rows, h): - for x in range(0, num_columns, w): - if (y == last_y) & (x == last_x): - pbar.update(1) - if c == last_c: - pbar.close() - yield np.array(img[y:y+h:downscale_factor, x:x+w:downscale_factor]) - # setting img to None seems necessary to prevent RAM spike - img = None - - -def da_to_zarr(da_img, zarr_store=None, num_workers=None, out_shape=None, chunks=None): - if zarr_store is None: - if out_shape is None: - out_shape = da_img.shape - if chunks is None: - chunks = da_img.chunksize - zarr_store = zarr.create( - out_shape, - chunks=chunks, - dtype=da_img.dtype, - overwrite=True - ) - da_img.to_zarr(zarr_store, compute=False).compute( - num_workers=num_workers - ) - return zarr_store - - -def process_markers(markers): - markers['ind'] = range(0, len(markers)) - if 'remove' not in markers: - markers['remove'] = ["False" for i in range(len(markers))] - else: - markers['remove'] = markers['remove'] == True - - markers['keep'] = markers['remove'] == False - - markers = markers.drop(columns=['remove']) - - return markers - -def detect_pixel_size(img_path,pixel_size=None): - if pixel_size is None: - print('Pixel size overwrite not specified') - try: - metadata = ome_types.from_tiff(img_path) - pixel_size = metadata.images[0].pixels.physical_size_x - except Exception as err: - print(err) - print('Pixel size detection using ome-types failed') - pixel_size = None - return pixel_size - -def scale_background(background_channel, scalar): - background_channel = np.rint(ne.evaluate("background_channel * scalar")) - return np.where(background_channel>65535,65535,background_channel.astype(np.uint16)) - -def subtract(channel_to_process, background): - return np.where(channel_to_process 1: + print( + f"""Warning: Background channel with name {markers.background[channel]} + appears several times in the column "marker_name". + Only the first occurrence will be used for the subtraction.""" + ) + bg_idx = bg_idx[0] + scaling_factor[channel] = ( + markers.exposure[channel] / markers.exposure[bg_idx] + ) + background_idx[channel] = bg_idx + + markers.insert(markers.shape[1], "factor", scaling_factor) + markers.insert(markers.shape[1], "bg_idx", background_idx) + + return markers + + +def extract_img_props(img_path, downscale_factor, tile_size, pixel_size=None): + + # Extract data_type, pyramidal specs, height, width + with tifff.TiffFile(img_path) as tif: + pyr_levels = len(tif.series[0].levels) + is_pyramid = pyr_levels > 1 + data_type = tif.series[0].dtype.name + height, width = tif.series[0].shape[-2::] + dask_chunksize = da.from_array(tif.pages[0].asarray(), chunks="auto").chunksize + if is_pyramid: + # dimensions of the reduced resolution layers(subresolution_dimensions) + subres_dims = [ + tif.series[0].levels[lvl].pages[0].shape for lvl in range(1, pyr_levels) + ] + else: + subres_dims = None + + if downscale_factor <= 1: + raise ValueError("downscale_factor must be greater than 1") + max_dim = max(height, width) + extracted_levels = int( + np.floor(np.log(max_dim / tile_size) / np.log(downscale_factor)) + 1 + ) + + # Try to extract pixel size from ome-xml + if pixel_size is None: + print("Pixel size not specified in the arguments (-mpp)") + try: + metadata = ome_types.from_tiff(img_path) + pixel_size = metadata.images[0].pixels.physical_size_x + pixel_size_unit = metadata.images[0].pixels.physical_size_x_unit + except Exception as err: + print(err) + print("Pixel size or pixel size unit detection using ome-types failed") + pixel_size = 1 + pixel_size_unit = "pixel" + else: + pixel_size = pixel_size + pixel_size_unit = "µm" + + img_props = { + "pixel_size": pixel_size, + "pixel_size_unit": pixel_size_unit, + "data_type": data_type, + "pyramid": is_pyramid, + "levels": pyr_levels, + "extracted_levels": max(extracted_levels, 1), + "sub_levels_dims": subres_dims, + "size_x": width, + "size_y": height, + "chunksize": dask_chunksize, + } + + return img_props + + +def subtract_channels( + src_img_path, + signal_index, + background_index, + factor, + ref_chunksize, + ref_dtype, + task_no, +): + factor = np.float32(factor) # limiting precision to float32 saves memory + signal_as_zarr = zarr.open( + tifff.imread( + src_img_path, aszarr=True, series=0, level=0, key=int(signal_index) + ) + ) + background_as_zarr = zarr.open( + tifff.imread( + src_img_path, aszarr=True, series=0, level=0, key=int(background_index) + ) + ) + signal = da.from_zarr(signal_as_zarr, chunks=ref_chunksize) + background = da.from_zarr(background_as_zarr, chunks=ref_chunksize) + if np.issubdtype(ref_dtype, np.integer): + info = np.iinfo(ref_dtype) + subtraction = da.clip(signal - factor * background, info.min, info.max).astype( + ref_dtype + ) + else: # float32/64, no upper clipping + subtraction = da.clip(signal - factor * background, 0, None).astype(ref_dtype) + with ResourceProfiler(dt=0.25) as resources: + with ProgressBar(): + result = subtraction.compute() + print(f"Resources used by dask during subtraction {task_no}:") + print(resources.results[0], "([sec],[MB],[% CPU usage])") + return result + + +def extract_sublevels_from_tiff(path, ch, levels): + with tifff.TiffFile(path) as tif: + for l in range(1, levels): + yield tif.series[0].levels[l].pages[ch].asarray() + + +def write_pyramid( + src_img_path, + tasks_table, + out_file, + levels, + sub_lvls_dims, + src_data_type, + pyramid_tile_shape, + compression, + is_src_pyramid=False, + save_ram=False, +): + + sub_levels = levels - 1 + + total_operations = tasks_table["processed"].values.sum() # Count True values + count = 1 + + if compression == "none": + compression = None + + with tifff.TiffWriter(out_file, bigtiff=True) as tif: + # write first the original resolution image,i.e. first layer + for _, channel in tasks_table.iterrows(): + if channel.processed: + operation_count = f"({count}/{total_operations})" + print( + f"\n {operation_count} Calculating subtraction of background {channel.background} from {channel.marker_name} signal:" + ) + first_layer = subtract_channels( + src_img_path, + channel.ind, + channel.bg_idx, + channel.factor, + (4096, 4096), + src_data_type, + operation_count, + ) + pyramid_action = "calculate" + count += 1 + else: + first_layer = tifff.imread( + src_img_path, series=0, level=0, key=int(channel.ind) + ) + + if save_ram or not is_src_pyramid: + pyramid_action = "calculate" + else: + pyramid_action = "extract" + + tif.write( + first_layer, + subifds=sub_levels, + tile=pyramid_tile_shape, + photometric="minisblack", + compression=compression, + ) + + if pyramid_action == "calculate": + if save_ram: + pyramid = pyramid_save_ram(first_layer, sub_levels) + else: + pyramid = pyramidal_levels(first_layer, sub_levels, sub_lvls_dims) + + elif pyramid_action == "extract": + pyramid = extract_sublevels_from_tiff( + src_img_path, int(channel.ind), levels + ) + + for sub_layer in pyramid: + tif.write( + sub_layer, + subfiletype=1, + tile=pyramid_tile_shape, + photometric="minisblack", + compression=compression, + # lzw works better when saving channel-by-channel + # jpeg2000 works better saving the whole stack at once (not done here) + # zlib is an alternative that's slower when writing, but gives better compression ratio + ) + + return out_file + + +@memocron +def main(): + args = CLI.get_args(__version__) + in_path = args.input + out_path = args.output + + # 0) Validate input_path is not the same as output_path, pixel data is read into RAM lazily, cannot overwrite input file + assert out_path != in_path + + if args.tile_size % 16 != 0: + raise ValueError("tile_size has to be a multiple of 16") + if args.compression not in ["none", "lzw", "deflate", "zlib"]: + raise ValueError( + "Compression has to be one of the following: 'none','lzw','deflate','zlib'" + ) + + tile_shape = (args.tile_size, args.tile_size) + + # 1) Extract image properties + src_props = extract_img_props( + in_path, args.downscale_factor, args.tile_size, args.pixel_size + ) + + # 2) Modify pyramid_levels if required + if src_props["pyramid"]: + levels = src_props["levels"] + else: + levels = src_props["extracted_levels"] + + # 3) Read/Create markers table and update it to include the information of the processing tasks + markers = process_markers(pd.read_csv(args.markers)) + + markers_updated = markers.loc[markers.keep] + + # 4) Write updated markers.csv without appended columns. This file contains the markers information of the final image stack + markers_preview = markers_updated.drop( + columns=["keep", "ind", "processed", "factor", "bg_idx"] + ) + markers_preview["channel_number"] = np.arange(1, len(markers_preview) + 1) + markers_preview.to_csv(args.markerout, index=False) + + logger.info("\nTASKS PREVIEW:\n{}", markers_updated) + tasks = 1 + for _, channel in markers_updated.iterrows(): + if channel.processed: + print( + f"\n(Task_{tasks}): background subtraction, Channel {channel.marker_name} (Background {channel.background})" + ) + tasks += 1 + + # 5) Calculate subtractions and write output file + out_file = args.output + + logger.info(f"\nTASKS PROGRESS") + + print(f"\nCommencing writing of pyramidal ome.tif file into {out_path}\n") + print(f"\nCommencing subtraction tasks\n") + if args.compression == "none": + compression = None + else: + compression = args.compression + pyramid_abs_path = write_pyramid( + in_path, + markers_updated, + out_file, + levels, + src_props["sub_levels_dims"], + src_props["data_type"], + tile_shape, + compression, + is_src_pyramid=src_props["pyramid"], + save_ram=args.save_ram, + ) + + # 6) Write metadata in OME format into the pyramidal file + channel_names = markers_updated["marker_name"].tolist() + ome_xml = ome_writer.create_ome(channel_names, src_props, __version__) + tifff.tiffcomment(pyramid_abs_path, ome_xml.encode("utf-8")) + + logger.info(f"\nSCRIPT FINISHED PROCESSING TASKS ") + print( + f"\nPyramidal image with {levels} levels was successfully written{'' if compression is None else f' with {compression} compression'}." + ) + + +if __name__ == "__main__": + main() diff --git a/backsub/metadata2markers.py b/backsub/metadata2markers.py new file mode 100644 index 0000000..b112ee3 --- /dev/null +++ b/backsub/metadata2markers.py @@ -0,0 +1,213 @@ +import pathlib +from ome_types import from_tiff +import argparse +import pandas as pd +import numpy as np +import warnings + + +# CLI +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-i", + "--input_img", + required=True, + type=pathlib.Path, + help="absolute path of the input image stack (.tif)", + ) + + parser.add_argument( + "-o", + "--output_dir", + required=True, + type=pathlib.Path, + help="absolute path of the directory where the output .csv file will be written", + ) + + parser.add_argument( + "-fn", + "--output_file_name", + required=False, + type=str, + default="markers.csv", + help="name of the csv file", + ) + + parser.add_argument( + "-rr", + "--remove_background_channels", + required=False, + action="store_true", + help='If applied, values in the "remove" column will be set to TRUE for any channel used as background. Use with caution as background channel exclusion may affect downstream analysis and autofluroescence measurements can be useful.', + ) + + parser.add_argument( + "-rf", + "--registration_filter", + required=False, + type=str, + default="DAPI", + help="Name of filter used for registration (default: DAPI). Note that all channels where the FluorescenceChannel attribute is this value in the metadata will be considered as reference channels.", + ) + + parser.add_argument( + "-rd", + "--remove_dapi", + required=False, + action="store_true", + help='If applied, values in the "remove" column will be set to TRUE for any additional DAPI reference except the first. Manual correction of the output markers file can specify other DAPI channels if needed.', + ) + + parser.add_argument( + "-krm", + "--keep_registration_markername", + required=False, + type=str, + default="DAPI", + help='Name of registration marker that should be kept if "remove_dapi" is set to TRUE. Default is "DAPI". If provided value is not present in the marker_name column, the first instance with the registration filter will be kept.', + ) + + args = parser.parse_args() + return args + + +def make_marker_names_unique_list(markers): + """ + Ensures all marker names in a list are unique. + If a marker appears multiple times, appends an index suffix (_1, _2, ...). + + Example: + ["DAPI", "TRITC", "Cy5", "DAPI"] → ["DAPI", "TRITC", "Cy5", "DAPI_1"] + + Args: + markers (list[str]): List of marker names. + + Returns: + list[str]: List with unique marker names. + """ + seen = {} + unique_markers = [] + + for name in markers: + if name in seen: + seen[name] += 1 + unique_name = f"{name}_{seen[name]}" + else: + seen[name] = 0 + unique_name = name + unique_markers.append(unique_name) + + return unique_markers + + +def meta_from_file(src_img_path, registration_filter): + # Fetch metadata object + ome = from_tiff(src_img_path) + # Fetch image attributes from ome + ch_names = make_marker_names_unique_list( + [element.name for element in ome.images[0].pixels.channels] + ) + exp_times = [element.exposure_time for element in ome.images[0].pixels.planes] + + filters = [ + element.attributes["FluorescenceChannel"] + for element in ome.structured_annotations[0].value.any_elements[0].children + ] + if registration_filter not in filters: + raise ValueError( + f"Registration filter '{registration_filter}' not found in image metadata filters: {list(set(filters))}" + ) + + background = [ + None if registration_filter in element else element for element in filters + ] + + aux_dict = { + "channel_number": list(range(1, len(ch_names) + 1)), + # "cycle_number":cycles, + "marker_name": ch_names, + "Filter": filters, + "background": background, + "exposure": exp_times, + } + + df = pd.DataFrame(aux_dict) + return df + + +def assign_background( + df, rmv_ref=False, rmv_back=False, ref_marker="DAPI", registration_filter="DAPI" +): + # Create column ["backsub_process"] indicating which rows will be processed with backsub + filters_ = df.Filter.unique().tolist() + # Strings corresponding to filters/background names are set to False, since the are not processed + backsub_process = df["marker_name"].replace(filters_, value=False, regex=True) + # Marker_name corresponding to signal will be set to True for processing + backsub_process = np.where(backsub_process == False, False, True) + df.insert(df.shape[1], "backsub_process", backsub_process) + + # Assign the latest mention of the autofluorescence channel to the correspondent row in the background column + rename_background = [] + + # List with row indices of background channels to be removed + for idx, row in df.iterrows(): + + if row.backsub_process: + previous_channels = reversed(df.iloc[:idx].marker_name.to_list()) + for element in previous_channels: + # Supposes background name is a subset of the channel/marker name + if row.background in element: + rename_background.append(element) + break + else: + rename_background.append(None) + + df.drop(columns=["backsub_process"], inplace=True) + df.background = rename_background + + if rmv_ref or rmv_back: + remove = len(df) * [""] + df.insert(df.shape[1], "remove", remove) + + if rmv_back: + df.loc[ + df["background"].isnull() & (df["Filter"] != registration_filter), + ["remove"], + ] = "TRUE" + + if rmv_ref: + df.loc[ + df["background"].isnull() & (df["Filter"] == registration_filter), + ["remove"], + ] = "TRUE" + if ref_marker not in df.marker_name.values: + first_ref_marker = df[df.Filter == registration_filter].index[0] + corr_ref_marker = df.loc[first_ref_marker, "marker_name"] + warnings.warn( + f"Reference marker '{ref_marker}' not found. Using first instance of registration filter '{registration_filter}' - Channel '{corr_ref_marker}' instead." + ) + else: + first_ref_marker = df[df.marker_name == ref_marker].index[0] + df.loc[first_ref_marker, "remove"] = "" + + return df + + +def main(): + args = get_args() + img_path = args.input_img + out_dir = args.output_dir + file_name = args.output_file_name + registration_filter = args.registration_filter + global_ref_marker = args.keep_registration_markername + + df = meta_from_file(img_path, ref_marker_name=registration_filter) + df_updated = assign_background( + df, args.remove_background_references, global_ref_marker, registration_filter + ) + df_updated.to_csv(out_dir / file_name, index=False) + + +if __name__ == "__main__": + main() diff --git a/backsub/ome_schema.py b/backsub/ome_schema.py new file mode 100644 index 0000000..a5fca63 --- /dev/null +++ b/backsub/ome_schema.py @@ -0,0 +1,170 @@ +#!/usr/bin/python +import ome_types +from ome_types.model import OME, Image, Pixels, TiffData, Channel, Plane +import platform + + +def INPUTS(frame): + """ + This function creates a dictionary with the metadata of the tiles. + Args: + frame (pd.DataFrame): dataframe containing the metadata of the tiles. + conformed_markers (list): list of tuples with the name of the markers and their corresponding fluorophore. + Returns: + dict: dictionary with the metadata of the tiles. + """ + inputs = frame.to_dict("list") + + return inputs + + +def TIFF_array(no_of_channels, inputs={"offset": 0}): + """ + This function creates a list of TIFFData objects. + Args: + no_of_channels (int): number of channels. + inputs (dict): dictionary with the metadata of the tiles. + Returns: + list: list of TIFFData objects. + """ + TIFF = [ + TiffData(first_c=ch, ifd=n, plane_count=1) + for n, ch in enumerate(range(0, no_of_channels), start=inputs["offset"]) + ] + + return TIFF + + +def PLANE_array(no_of_channels, inputs): + """ + This function creates a list of Plane objects. + Args: + no_of_channels (int): number of channels. + inputs (dict): dictionary with the metadata of the tiles. + Returns: + list: list of Plane objects. + """ + + PLANE = [ + Plane( + the_c=ch, + the_t=0, + the_z=0, + position_x=inputs["position_x"][ch] if "position_x" in inputs.keys() else 0, + position_y=inputs["position_y"][ch] if "position_y" in inputs.keys() else 0, + position_z=0, + position_x_unit=( + inputs["position_x_unit"][ch] + if "position_x_unit" in inputs.keys() + else "pixel" + ), + position_y_unit=( + inputs["position_y_unit"][ch] + if "position_y_unit" in inputs.keys() + else "pixel" + ), + ) + for ch in range(0, no_of_channels) + ] + + return PLANE + + +def CHANN_array(no_of_channels, inputs): + """ + This function creates a list of Channel objects. + Args: + no_of_channels (int): number of channels. + inputs (dict): dictionary with the metadata of the tiles. + Returns: + list: list of Channel objects. + """ + + CHANN = [ + Channel( + id=f"Channel:{str(ch)}", # 'Channel:{y}:{x}:{marker_name}'.format(x=ch,y=100+int( inputs['tile'][ch] ) ,marker_name=inputs['marker'][ch] ) + name=inputs["name"][ch], + samples_per_pixel=1, + ) + for ch in range(0, no_of_channels) + ] + + return CHANN + + +def PIXELS_array(chann_block, plane_block, tiff_block, inputs): + """ + This function creates a Pixels object. + Args: + chann_block (list): list of Channel objects. + plane_block (list): list of Plane objects. + tiff_block (list): list of TIFFData objects. + inputs (dict): dictionary with the metadata of the tiles. + Returns: + Pixels: Pixels object. + """ + PIXELS = Pixels( + id=f"Pixels:{inputs['tile'][0]}", + dimension_order="XYCZT", + size_c=len(chann_block), + size_t=1, + size_x=inputs["size_x"][0], + size_y=inputs["size_y"][0], + size_z=1, + type=inputs["type"][0], # bit_depth + big_endian=False, + channels=chann_block, + interleaved=False, + physical_size_x=inputs["physical_size_x"][0], + physical_size_x_unit=inputs["physical_size_x_unit"][0], + physical_size_y=inputs["physical_size_y"][0], + physical_size_y_unit=inputs["physical_size_y_unit"][0], + physical_size_z=1.0, + planes=plane_block, + significant_bits=inputs["significant_bits"][0], + tiff_data_blocks=tiff_block, + ) + + return PIXELS + + +def IMAGE_array(pixels_block, imageID): + """ + This function creates an Image object. + Args: + pixels_block (Pixels): Pixels object. + imageID (int): identifier of the image. + Returns: + Image: Image object. + """ + + IMAGE = Image(id=f"Image:{imageID}", pixels=pixels_block) + + return IMAGE + + +def OME_metadata(image_block, software): + """ + This function creates an OME object. + Args: + image_block (list): list of Image objects. + Returns: + OME: OME object. + """ + ome = OME() + ome.creator = software + # detailed versions provided in environment yml + # ome.creator = " ".join( + # [ + # software, + # ome_types.__name__, + # ome_types.__version__, + # "/ python version-", + # platform.python_version(), + # ] + # ) + + ome.images = image_block + ome_xml = ome_types.to_xml(ome) + + return ome, ome_xml diff --git a/backsub/ome_writer.py b/backsub/ome_writer.py new file mode 100644 index 0000000..82a81a2 --- /dev/null +++ b/backsub/ome_writer.py @@ -0,0 +1,46 @@ +import backsub.ome_schema as schema +import pandas as pd + + +def create_ome(conformed_markers, info, software_version): + """ + This function creates an OME-XML file from a pandas dataframe containing the metadata of the tiles. + Args: + tile_info (pd.DataFrame): dataframe containing the metadata of the tiles. + conformed_markers (list): list with the name of the markers in the corresponding order of their appearance in the ome.tif file . + Returns: + str: OME-XML file. + """ + software = f"backsub {software_version}" + no_of_channels = len(conformed_markers) + tile_info_dict = { + "tile": no_of_channels * [1], + "name": conformed_markers, + "type": no_of_channels * [info["data_type"]], + "size_x": no_of_channels * [info["size_x"]], + "size_y": no_of_channels * [info["size_y"]], + "physical_size_x": no_of_channels * [info["pixel_size"]], + "physical_size_x_unit": no_of_channels * [info["pixel_size_unit"]], + "physical_size_y": no_of_channels * [info["pixel_size"]], + "physical_size_y_unit": no_of_channels * [info["pixel_size_unit"]], + "significant_bits": no_of_channels * ["16"], + } + tile_info = pd.DataFrame(tile_info_dict) + + grouped_tiles = tile_info.groupby(["tile"]) + + tiles_counter = 0 + image = [] + for tileID, frame in grouped_tiles: + metadata = schema.INPUTS(frame) + tiff = schema.TIFF_array( + no_of_channels, inputs={"offset": no_of_channels * tiles_counter} + ) + plane = schema.PLANE_array(no_of_channels, metadata) + channel = schema.CHANN_array(no_of_channels, metadata) + pixels = schema.PIXELS_array(channel, plane, tiff, metadata) + image.append(schema.IMAGE_array(pixels, tiles_counter)) + tiles_counter += 1 + ome, ome_xml = schema.OME_metadata(image, software) + + return ome_xml diff --git a/environment.yml b/environment.yml index 957a760..64b8199 100644 --- a/environment.yml +++ b/environment.yml @@ -1,18 +1,16 @@ -name: backsub +name: backsub_env channels: - conda-forge - defaults - - anaconda dependencies: - - "python=3.9" - - "openslide=3.4.1" - - "scikit-image=0.19.2" - - "numexpr=2.8.3" - - "tifffile=2022.8.12" - - "scipy=1.9.3" - - "pandas=2.1.1" - - "zarr=2.3.2" - - procps-ng - - pip - - pip: - - palom \ No newline at end of file + - python=3.12.11 + - pandas=2.3.1 + - tifffile=2025.6.11 + - scikit-image=0.25.2 + - dask=2025.7.0 + - ome-types=0.6.0 + - numpy=2.3.2 + - loguru=0.7.3 + - dask-image=2024.5.3 + - zarr=3.1.1 + - pip \ No newline at end of file diff --git a/example/application_example.png b/example/application_example.png new file mode 100644 index 0000000..e0c2947 Binary files /dev/null and b/example/application_example.png differ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1d17be8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,40 @@ +[project] +name = "backsub" +dynamic = ["version"] +description = "Channel subtraction scaled by exposure times" +readme = "README.md" +requires-python = ">=3.9" +license = { file = "LICENSE" } +authors = [ + { name="Krešimir Bestak", email="kbestak@gmail.com" } +] +keywords = ["autofluorescence", "imaging", "immunofluorescence", "subtraction"] +classifiers = [ + "Intended Audience :: Science/Research", + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License" +] +dependencies = [ + "pandas", + "tifffile", + "ome_types", + "numpy", + "loguru", + "dask", + "dask-image", + "scikit-image", + "zarr>=3" +] +[project.urls] +Homepage = "https://github.com/schapirolabor/background_subtraction" +Issues = "https://github.com/schapirolabor/background_subtraction/issues" + +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.dynamic] +version = {attr = "backsub._version.__version__"} + +[project.scripts] +backsub = "backsub.background_sub:main" diff --git a/scripts/old/background_sub_v034.py b/scripts/old/background_sub_v034.py deleted file mode 100644 index 69bdd6d..0000000 --- a/scripts/old/background_sub_v034.py +++ /dev/null @@ -1,313 +0,0 @@ -from __future__ import print_function, division -from multiprocessing.spawn import import_main_path -import sys -import copy -import argparse -import numpy as np -import tifffile -import zarr -import skimage.transform -from aicsimageio import aics_image as AI -import pandas as pd -import numexpr as ne -from ome_types import from_tiff, to_xml -from os.path import abspath -from argparse import ArgumentParser as AP -import time -import dask -from memory_profiler import profile -# This API is apparently changing in skimage 1.0 but it's not clear to -# me what the replacement will be, if any. We'll explicitly import -# this so it will break loudly if someone tries this with skimage 1.0. -try: - from skimage.util.dtype import _convert as dtype_convert -except ImportError: - from skimage.util.dtype import convert as dtype_convert - - -# arg parser -def get_args(): - # Script description - description="""Subtracts background - Lunaphore platform""" - - # Add parser - parser = AP(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) - - # Sections - inputs = parser.add_argument_group(title="Required Input", description="Path to required input file") - inputs.add_argument("-r", "--root", dest="root", action="store", required=True, help="File path to root image file.") - inputs.add_argument("-m", "--markers", dest="markers", action="store", required=True, help="File path to required markers.csv file") - inputs.add_argument("--pixel-size", metavar="SIZE", dest = "pixel_size", type=float, default = None, action = "store",help="pixel size in microns; default is 1.0") - inputs.add_argument("--pyramid", dest="pyramid", required=False, default=True, help="Should output be pyramidal?") - inputs.add_argument("--tile-size", dest="tile_size", required=False, default=1024, help="Tile size for pyramid generation") - inputs.add_argument("--version", action="version", version="v0.3.4") - - outputs = parser.add_argument_group(title="Output", description="Path to output file") - outputs.add_argument("-o", "--output", dest="output", action="store", required=True, help="Path to output file") - outputs.add_argument("-mo", "--marker-output", dest="markerout", action="store", required=True, help="Path to output marker file") - - arg = parser.parse_args() - - # Standardize paths - arg.root = abspath(arg.root) - arg.markers = abspath(arg.markers) - arg.output = abspath(arg.output) - arg.markerout = abspath(arg.markerout) - - return arg - -def preduce(coords, img_in, img_out): - print(img_in.dtype) - (iy1, ix1), (iy2, ix2) = coords - (oy1, ox1), (oy2, ox2) = np.array(coords) // 2 - tile = skimage.img_as_float32(img_in[iy1:iy2, ix1:ix2]) - tile = skimage.transform.downscale_local_mean(tile, (2, 2)) - tile = dtype_convert(tile, 'uint16') - #tile = dtype_convert(tile, img_in.dtype) - img_out[oy1:oy2, ox1:ox2] = tile - -def format_shape(shape): - return "%dx%d" % (shape[1], shape[0]) - -def process_markers(markers): - # add column with starting indices (which match the image channel indices) - # this should be removed soon - markers['ind'] = range(0, len(markers)) - - # if the 'remove' column is not specified, all channels are kept. If it is - # present, it is converted to a boolean indicating which channels should be removed - if 'remove' not in markers: - markers['remove'] = ["False" for i in range(len(markers))] - else: - markers['remove'] = markers['remove'] == True - # invert the markers.remove column to indicate which columns to keep - markers['remove'] = markers['remove'] == False - - return markers - -def process_metadata(metadata, markers): - try: - metadata.screens[0].reagents = [metadata.screens[0].reagents[i] for i in markers[markers.remove == True].ind] - except: - pass - try: - metadata.structured_annotations = [metadata.structured_annotations[i] for i in markers[markers.remove == True].ind] - except: - pass - # these two are required - metadata.images[0].pixels.size_c = len(markers[markers.remove == True]) - metadata.images[0].pixels.channels = [metadata.images[0].pixels.channels[i] for i in markers[markers.remove == True].ind] - try: - metadata.images[0].pixels.planes = [metadata.images[0].pixels.planes[i] for i in markers[markers.remove == True].ind] - except: - pass - try: - metadata.images[0].pixels.tiff_data_blocks[0].plane_count = sum(markers.remove == True) - except: - pass - return metadata - - -# NaN values return True for the statement below in this version of Python. Did not use math.isnan() since the values -# are strings if present -def isNaN(x): - return x != x - -def subtract_channel(image, markers, channel, background_marker, output): - scalar = markers[markers.ind == channel].exposure.values / background_marker.exposure.values - - # create temporary dataframe which will store the multiplied background rounded up to nearest integer - # [0] at the end needed to get [x, y] shape, and not have [1, x, y] - back = copy.copy(image[background_marker.ind])[0] - # subtract background from processed channel and if the background intensity for a certain pixel was larger than - # the processed channel, set intensity to 0 (no negative values) - back = np.rint(ne.evaluate("back * scalar")) - back = np.where(back>65535,65535,back.astype(np.uint16)) - # set the pixel value to 0 if the image channel value is lower than the scaled background channel value - # otherwise, subtract. - output[channel] = np.where(image[channel]= 1 - num_channels, h, w = level_full_shapes[level] - tshape = tile_shapes[level] or (h, w) - tiff = tifffile.TiffFile(outpath) - zimg = zarr.open(tiff.aszarr(series=0, level=level-1, squeeze=False)) - for c in range(num_channels): - sys.stdout.write( - f"\r processing channel {c + 1}/{num_channels}" - ) - sys.stdout.flush() - th = tshape[0] * scale - tw = tshape[1] * scale - for y in range(0, zimg.shape[1], th): - for x in range(0, zimg.shape[2], tw): - a = zimg[c, y:y+th, x:x+tw, 0] - a = skimage.transform.downscale_local_mean( - a, (scale, scale) - ) - if np.issubdtype(zimg.dtype, np.integer): - a = np.around(a) - a = a.astype('uint16') - yield a - -@profile -def main(args): - img_raw = AI.AICSImage(args.root) - img = img_raw.get_image_dask_data("CYX") - - markers_raw = pd.read_csv(args.markers) - markers = process_markers(copy.copy(markers_raw)) - - output = dask.array.empty_like(img) - - output = subtract(img, markers, output) - output = remove_back(output, markers) - - markers_raw = markers_raw[markers_raw.remove != True] - markers_raw = markers_raw.drop("remove", axis = 1) - markers_raw.to_csv(args.markerout, index=False) - - # Processing metadata - highly adapted to Lunaphore outputs - # check if metadata is present - try: - print(img_raw.metadata.images[0]) - metadata = img_raw.metadata - metadata = process_metadata(img_raw.metadata, markers) - except: - metadata = None - - if args.pixel_size != None: - # If specified, the input pixel size is used - pixel_size = args.pixel_size - - else: - try: - if img_raw.metadata.images[0].pixels.physical_size_x != None: - pixel_size = img_raw.metadata.images[0].pixels.physical_size_x - else: - pixel_size = 1.0 - except: - # If no pixel size specified anywhere, use default 1.0 - pixel_size = 1.0 - if (args.pyramid == True) and (int(args.tile_size)<=max(output[0].shape)): - - # construct levels - tile_size = int(args.tile_size) - scale = 2 - - print() - dtype = output.dtype - base_shape = output[0].shape - num_channels = output.shape[0] - num_levels = (np.ceil(np.log2(max(base_shape) / tile_size)) + 1).astype(int) - factors = 2 ** np.arange(num_levels) - shapes = (np.ceil(np.array(base_shape) / factors[:,None])).astype(int) - print(base_shape) - print(np.ceil(np.log2(max(base_shape)/tile_size))+1) - - print("Pyramid level sizes: ") - for i, shape in enumerate(shapes): - print(f" level {i+1}: {format_shape(shape)}", end="") - if i == 0: - print("(original size)", end="") - print() - print() - print(shapes) - - level_full_shapes = [] - for shape in shapes: - level_full_shapes.append((num_channels, shape[0], shape[1])) - level_shapes = shapes - tip_level = np.argmax(np.all(level_shapes < tile_size, axis=1)) - tile_shapes = [ - (tile_size, tile_size) if i <= tip_level else None - for i in range(len(level_shapes)) - ] - - # write pyramid - with tifffile.TiffWriter(args.output, ome=True, bigtiff=True) as tiff: - tiff.write( - data = output, - shape = level_full_shapes[0], - subifds=int(num_levels-1), - dtype=dtype, - resolution=(10000 / pixel_size, 10000 / pixel_size, "centimeter"), - tile=tile_shapes[0] - ) - for level, (shape, tile_shape) in enumerate( - zip(level_full_shapes[1:], tile_shapes[1:]), 1 - ): - tiff.write( - data = subres_tiles(level, level_full_shapes, tile_shapes, args.output, scale), - shape=shape, - subfiletype=1, - dtype=dtype, - tile=tile_shape - ) - else: - # write image - with tifffile.TiffWriter(args.output, ome=True, bigtiff=True) as tiff: - tiff.write( - data = output, - shape = output.shape, - dtype=output.dtype, - resolution=(10000 / pixel_size, 10000 / pixel_size, "centimeter"), - ) - try: - tifffile.tiffcomment(args.output, to_xml(metadata)) - except: - pass - # note about metadata: the channels, planes etc were adjusted not to include the removed channels, however - # the channel ids have stayed the same as before removal. E.g if channels 1 and 2 are removed, - # the channel ids in the metadata will skip indices 1 and 2 (channel_id:0, channel_id:3, channel_id:4 ...) - print() - - - -if __name__ == '__main__': - # Read in arguments - args = get_args() - - # Run script - st = time.time() - main(args) - rt = time.time() - st - print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/scripts/old/background_subtraction.py b/scripts/old/background_subtraction.py deleted file mode 100644 index c5a9efb..0000000 --- a/scripts/old/background_subtraction.py +++ /dev/null @@ -1,157 +0,0 @@ -from distutils.log import error -import time -import argparse -from argparse import ArgumentParser as AP -from os.path import abspath -import pandas as pd -import tifffile as tf -import numpy as np - -def get_args(): - # Script description - description="""Subtracts background - Lunaphore platform""" - - # Add parser - parser = AP(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) - - # Sections - inputs = parser.add_argument_group(title="Required Input", description="Path to required input file") - inputs.add_argument("-r", "--raw", dest="raw", action="store", required=True, help="File path to raw image file.") - inputs.add_argument("-m", "--markers", dest="markers", action="store", required=True, help="File path to required markers.csv file") - - outputs = parser.add_argument_group(title="Output", description="Path to output file") - outputs.add_argument("-o", "--output", dest="output", action="store", required=True, help="Path to output file") - - arg = parser.parse_args() - - # Standardize paths - arg.raw = abspath(arg.raw) - arg.markers = abspath(arg.markers) - arg.output = abspath(arg.output) - - return arg - - - -def get_info(input): - info = tf.TiffFile(input) - #if (len(info.series)>1): - # sys.exit("Multiple scenes") - # ^^not good because in pyramidal ome-tiffs we get different levels as different scenes/series - return info - - - -def load(input, channel): - # returns numpy array of the specified channel from input - # here so it is easier to change if better way found - return np.array(tf.imread(input, key = channel)) - - - -def write(data, output, channel): - # only first channel does not have to be appended - if (channel == 0): - tf.imwrite(output, data, append=False) - else: - tf.imwrite(output, data, append = True) - # writes the channel(data) to the specified output file - - - - -def main(args): - - print(f"Raw file = {args.raw}") - print(f"Marker file = {args.markers}") - - markers = pd.read_csv(args.markers) - - # could just be extracted from markers.csv file - info = get_info(args.raw) - ch_number = len(info.pages) - assert ch_number == len(markers), "Marker file doesn't match image, check channels" - - # required columns for background subtraction are "stain", "background" and "exposure", everything else is dropped - try: - markers = markers[["stain", "background", "exposure"]] - except: - print("Markers file needs to contain stain, background and exposure columns") - - # subset to the only three important columns - markers = markers[["stain", "background", "exposure"]] - # add column with starting indices (which match the image channel indices) - not necessary? - markers['ind'] = range(0, len(markers)) - # subset background markers - markers_background = markers[markers.background == True][["ind", "stain", "exposure"]] - # subset markers which need background subtracted - markers = markers[markers.background != True][["ind", "stain", "exposure"]] - - # list of stains specified as background - background_stains = [stain for stain in markers_background.stain] - # initializing sublist - sublist will indicate which background channel to subtract - sublist = [-1 for i in range(len(markers))] - for i, stain in enumerate(markers_background.stain): - # it should be possible for the regex to catch multiple channels unless one is contained in the beginning of the other(s), if so, the code breaks - # the i+1 is only there so that each row with a regex match gets shifted by one ot differentiate between unmatched and matched rows - sublist += np.where(markers.stain.str.match(stain)==True, i+1, 0) - # add sublist to markers - if sublist == -1, no background subtraction, if sublist == 0, first stain in background_stains should be subtracted - # if sublist == n, n-1th stain in background stains should be subtracted - markers["sublist"]=sublist - - # create variables containing the background channels! - background_dictionary = {} - for channel in markers_background.ind: - # for each channel containing the background, read in the image as a numpy array - # data is saved in the background_dictionary with the name "background_channel" - background_dictionary["background_"+str(channel)]=np.array(tf.imread(args.raw, key = channel)) - - # main part - for channel in markers.ind: - # if sublist has negative values, no background subtraction because it doesn't match any background channel - if list(markers.sublist[[channel]])[0] < 0: - # load the channel which doesn't have a corresponding background channel and write it to output - write(load(args.raw, channel=channel), args.output, channel=channel) - print("Channel "+str(channel)+" written into tif, no background subtraction") - else: - # extracting just the row from the markers_background dataframe which contains the stain denoted by markers.sublist - temp_back = markers_background[markers_background.stain == background_stains[list(markers.sublist[[channel]])[0]]] - # extracting just the row from markers for the channel being processed - temp_mark = markers[markers.ind==channel] - - # load the channel being processed - temp = load(args.raw, channel=channel) - - # calculate scalar with which the background will be multiplied (Marker_corr = Marker_raw - Background * Exposure_marker / Exposure_background) - scalar = list(temp_mark.exposure)[0] / list(temp_back.exposure)[0] - - # create temporary dataframe which will store the multiplied background rounded up to nearest integer - temp2 = np.rint(np.multiply(background_dictionary["background_"+str(list(temp_back.ind)[0])], scalar)) - - # data has to be converted back to 'uint16' - all outliers will be limited to 2^16 (which happens after multiplication with a scalar) - temp2 = temp2.astype(np.uint16) - temp = temp.astype(np.uint16) - - # to avoid any negative values, if the processed channel has a pixel value lower than the scaled background, they are both set to 0 - temp[temp= 1 - num_channels, h, w = level_full_shapes[level] - tshape = tile_shapes[level] or (h, w) - tiff = tifffile.TiffFile(outpath) - zimg = zarr.open(tiff.aszarr(series=0, level=level-1, squeeze=False)) - for c in range(num_channels): - sys.stdout.write( - f"\r processing channel {c + 1}/{num_channels}" - ) - sys.stdout.flush() - th = tshape[0] * scale - tw = tshape[1] * scale - for y in range(0, zimg.shape[1], th): - for x in range(0, zimg.shape[2], tw): - a = zimg[c, y:y+th, x:x+tw, 0] - a = skimage.transform.downscale_local_mean( - a, (scale, scale) - ) - if np.issubdtype(zimg.dtype, np.integer): - a = np.around(a) - a = a.astype('uint16') - yield a - - -def main(args): - img_raw = AI.AICSImage(args.root) - img = img_raw.get_image_dask_data("CYX") - - markers_raw = pd.read_csv(args.markers) - markers = process_markers(copy.copy(markers_raw)) - - img = subtract(img, markers) - img = remove_back(img, markers) - - markers_raw = markers_raw[markers_raw.background != True] - markers_raw = markers_raw.drop("background", axis = 1) - markers_raw.to_csv(args.markerout, index=False) - - # Processing metadata - highly adapted to Lunaphore outputs - metadata = process_metadata(img_raw.metadata, markers) - metadata = img_raw.metadata - - - if args.pixel_size != None: - # If specified, the inputted pixel size is used - pixel_size = args.pixel_size - elif img_raw.metadata.images[0].pixels.physical_size_x != None: - # If pixel size is not specified, the metadata is checked (the script trusts users more than metadata) - pixel_size = img_raw.metadata.images[0].pixels.physical_size_x - else: - # If no pixel size specified anywhere, use default 1.0 - pixel_size = 1.0 - - # construct levels - tile_size = 1024 - scale = 2 - - print() - dtype = img.dtype - base_shape = img[0].shape - num_channels = img.shape[0] - num_levels = (np.ceil(np.log2(max(base_shape) / tile_size)) + 1).astype(int) - factors = 2 ** np.arange(num_levels) - shapes = (np.ceil(np.array(base_shape) / factors[:,None])).astype(int) - - print("Pyramid level sizes: ") - for i, shape in enumerate(shapes): - print(f" level {i+1}: {format_shape(shape)}", end="") - if i == 0: - print("(original size)", end="") - print() - print() - print(shapes) - - level_full_shapes = [] - for shape in shapes: - level_full_shapes.append((num_channels, shape[0], shape[1])) - level_shapes = shapes - - #level_shapes = np.array(level_shapes) - tip_level = np.argmax(np.all(level_shapes < tile_size, axis=1)) - tile_shapes = [ - (tile_size, tile_size) if i <= tip_level else None - for i in range(len(level_shapes)) - ] - - # write pyramid - - with tifffile.TiffWriter(args.output, ome=True, bigtiff=True) as tiff: - tiff.write( - data = img, - shape = level_full_shapes[0], - subifds=int(num_levels-1), - dtype=dtype, - resolution=(10000 / pixel_size, 10000 / pixel_size, "centimeter"), - tile=tile_shapes[0] - ) - for level, (shape, tile_shape) in enumerate( - zip(level_full_shapes[1:], tile_shapes[1:]), 1 - ): - tiff.write( - data = subres_tiles(level, level_full_shapes, tile_shapes, args.output, scale), - shape=shape, - subfiletype=1, - dtype=dtype, - tile=tile_shape - ) - if metadata.images[0].pixels.planes: - temp_planes = [] - for i, channel_id in enumerate(range(num_channels)): - temp_plane = metadata.images[0].pixels.planes[channel_id] - temp_plane.the_c = i - temp_planes.append(temp_plane) - metadata.images[0].pixels.planes = temp_planes - if metadata.images[0].pixels.tiff_data_blocks and len( - metadata.images[0].pixels.tiff_data_blocks) > 0: - metadata.images[0].pixels.tiff_data_blocks[0].plane_count = num_channels - # Write - tifffile.tiffcomment(args.output, to_xml(metadata)) - print() - - - -if __name__ == '__main__': - # Read in arguments - args = get_args() - - # Run script - st = time.time() - main(args) - rt = time.time() - st - print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") - - - - - - - - diff --git a/scripts/old/background_subtraction_v033.py b/scripts/old/background_subtraction_v033.py deleted file mode 100644 index 58f6a7f..0000000 --- a/scripts/old/background_subtraction_v033.py +++ /dev/null @@ -1,284 +0,0 @@ -from __future__ import print_function, division -from multiprocessing.spawn import import_main_path -import sys -import copy -import argparse -import numpy as np -import tifffile -import zarr -import skimage.transform -from aicsimageio import aics_image as AI -import pandas as pd -import numexpr as ne -from ome_types import from_tiff, to_xml -from os.path import abspath -from argparse import ArgumentParser as AP -import time -import dask -# This API is apparently changing in skimage 1.0 but it's not clear to -# me what the replacement will be, if any. We'll explicitly import -# this so it will break loudly if someone tries this with skimage 1.0. -try: - from skimage.util.dtype import _convert as dtype_convert -except ImportError: - from skimage.util.dtype import convert as dtype_convert - - -# arg parser -def get_args(): - # Script description - description="""Subtracts background - Lunaphore platform""" - - # Add parser - parser = AP(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) - - # Sections - inputs = parser.add_argument_group(title="Required Input", description="Path to required input file") - inputs.add_argument("-r", "--root", dest="root", action="store", required=True, help="File path to root image file.") - inputs.add_argument("-m", "--markers", dest="markers", action="store", required=True, help="File path to required markers.csv file") - inputs.add_argument("--pixel-size", metavar="SIZE", dest = "pixel_size", type=float, default = None, action = "store",help="pixel size in microns; default is 1.0") - - outputs = parser.add_argument_group(title="Output", description="Path to output file") - outputs.add_argument("-o", "--output", dest="output", action="store", required=True, help="Path to output file") - outputs.add_argument("-mo", "--marker-output", dest="markerout", action="store", required=True, help="Path to output marker file") - - arg = parser.parse_args() - - # Standardize paths - arg.root = abspath(arg.root) - arg.markers = abspath(arg.markers) - arg.output = abspath(arg.output) - arg.markerout = abspath(arg.markerout) - - return arg - -def preduce(coords, img_in, img_out): - print(img_in.dtype) - (iy1, ix1), (iy2, ix2) = coords - (oy1, ox1), (oy2, ox2) = np.array(coords) // 2 - tile = skimage.img_as_float32(img_in[iy1:iy2, ix1:ix2]) - tile = skimage.transform.downscale_local_mean(tile, (2, 2)) - tile = dtype_convert(tile, 'uint16') - #tile = dtype_convert(tile, img_in.dtype) - img_out[oy1:oy2, ox1:ox2] = tile - -def format_shape(shape): - return "%dx%d" % (shape[1], shape[0]) - -def process_markers(markers): - # add column with starting indices (which match the image channel indices) - # this should be removed soon - markers['ind'] = range(0, len(markers)) - - # if the 'remove' column is not specified, all channels are kept. If it is - # present, it is converted to a boolean indicating which channels should be removed - if 'remove' not in markers: - markers['remove'] = ["False" for i in range(len(markers))] - else: - markers['remove'] = markers['remove'] == True - # invert the markers.remove column to indicate which columns to keep - markers['remove'] = markers['remove'] == False - - return markers - -def process_metadata(metadata, markers): - try: - metadata.screens[0].reagents = [metadata.screens[0].reagents[i] for i in markers[markers.remove == True].ind] - except: - pass - try: - metadata.structured_annotations = [metadata.structured_annotations[i] for i in markers[markers.remove == True].ind] - except: - pass - # these two are required - metadata.images[0].pixels.size_c = len(markers[markers.remove == True]) - metadata.images[0].pixels.channels = [metadata.images[0].pixels.channels[i] for i in markers[markers.remove == True].ind] - try: - metadata.images[0].pixels.planes = [metadata.images[0].pixels.planes[i] for i in markers[markers.remove == True].ind] - except: - pass - try: - metadata.images[0].pixels.tiff_data_blocks[0].plane_count = sum(markers.remove == True) - except: - pass - return metadata - -# NaN values return True for the statement below in this version of Python. Did not use math.isnan() since the values -# are strings if present -def isNaN(x): - return x != x - -def subtract_channel(image, markers, channel, background_marker, output): - scalar = markers[markers.ind == channel].exposure.values / background_marker.exposure.values - - # create temporary dataframe which will store the multiplied background rounded up to nearest integer - # [0] at the end needed to get [x, y] shape, and not have [1, x, y] - back = copy.copy(image[background_marker.ind])[0] - # subtract background from processed channel and if the background intensity for a certain pixel was larger than - # the processed channel, set intensity to 0 (no negative values) - back = np.rint(ne.evaluate("back * scalar")) - back = np.where(back>65535,65535,back.astype(np.uint16)) - # set the pixel value to 0 if the image channel value is lower than the scaled background channel value - # otherwise, subtract. - output[channel] = np.where(image[channel]= 1 - num_channels, h, w = level_full_shapes[level] - tshape = tile_shapes[level] or (h, w) - tiff = tifffile.TiffFile(outpath) - zimg = zarr.open(tiff.aszarr(series=0, level=level-1, squeeze=False)) - for c in range(num_channels): - sys.stdout.write( - f"\r processing channel {c + 1}/{num_channels}" - ) - sys.stdout.flush() - th = tshape[0] * scale - tw = tshape[1] * scale - for y in range(0, zimg.shape[1], th): - for x in range(0, zimg.shape[2], tw): - a = zimg[c, y:y+th, x:x+tw, 0] - a = skimage.transform.downscale_local_mean( - a, (scale, scale) - ) - if np.issubdtype(zimg.dtype, np.integer): - a = np.around(a) - a = a.astype('uint16') - yield a - -def main(args): - img_raw = AI.AICSImage(args.root) - img = img_raw.get_image_dask_data("CYX") - - markers_raw = pd.read_csv(args.markers) - markers = process_markers(copy.copy(markers_raw)) - - output = dask.array.empty_like(img) - - output = subtract(img, markers, output) - output = remove_back(output, markers) - - markers_raw = markers_raw[markers_raw.remove != True] - markers_raw = markers_raw.drop("remove", axis = 1) - markers_raw.to_csv(args.markerout, index=False) - - # Processing metadata - highly adapted to Lunaphore outputs - metadata = img_raw.metadata - metadata = process_metadata(img_raw.metadata, markers) - - if args.pixel_size != None: - # If specified, the input pixel size is used - pixel_size = args.pixel_size - elif img_raw.metadata.images[0].pixels.physical_size_x != None: - # If pixel size is not specified, the metadata is checked (the script trusts users more than metadata) - pixel_size = img_raw.metadata.images[0].pixels.physical_size_x - else: - # If no pixel size specified anywhere, use default 1.0 - pixel_size = 1.0 - - # construct levels - tile_size = 1024 - scale = 2 - - print() - dtype = output.dtype - base_shape = output[0].shape - num_channels = output.shape[0] - num_levels = (np.ceil(np.log2(max(base_shape) / tile_size)) + 1).astype(int) - factors = 2 ** np.arange(num_levels) - shapes = (np.ceil(np.array(base_shape) / factors[:,None])).astype(int) - - print("Pyramid level sizes: ") - for i, shape in enumerate(shapes): - print(f" level {i+1}: {format_shape(shape)}", end="") - if i == 0: - print("(original size)", end="") - print() - print() - print(shapes) - - level_full_shapes = [] - for shape in shapes: - level_full_shapes.append((num_channels, shape[0], shape[1])) - level_shapes = shapes - tip_level = np.argmax(np.all(level_shapes < tile_size, axis=1)) - tile_shapes = [ - (tile_size, tile_size) if i <= tip_level else None - for i in range(len(level_shapes)) - ] - - # write pyramid - with tifffile.TiffWriter(args.output, ome=True, bigtiff=True) as tiff: - tiff.write( - data = output, - shape = level_full_shapes[0], - subifds=int(num_levels-1), - dtype=dtype, - resolution=(10000 / pixel_size, 10000 / pixel_size, "centimeter"), - tile=tile_shapes[0] - ) - for level, (shape, tile_shape) in enumerate( - zip(level_full_shapes[1:], tile_shapes[1:]), 1 - ): - tiff.write( - data = subres_tiles(level, level_full_shapes, tile_shapes, args.output, scale), - shape=shape, - subfiletype=1, - dtype=dtype, - tile=tile_shape - ) - - # note about metadata: the channels, planes etc were adjusted not to include the removed channels, however - # the channel ids have stayed the same as before removal. E.g if channels 1 and 2 are removed, - # the channel ids in the metadata will skip indices 1 and 2 (channel_id:0, channel_id:3, channel_id:4 ...) - tifffile.tiffcomment(args.output, to_xml(metadata)) - print() - - - -if __name__ == '__main__': - # Read in arguments - args = get_args() - - # Run script - st = time.time() - main(args) - rt = time.time() - st - print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") diff --git a/scripts/old/background_subtraction_v2.py b/scripts/old/background_subtraction_v2.py deleted file mode 100644 index 05c75df..0000000 --- a/scripts/old/background_subtraction_v2.py +++ /dev/null @@ -1,134 +0,0 @@ -from distutils.log import error -import time -import argparse -from argparse import ArgumentParser as AP -from os.path import abspath -import pandas as pd -import tifffile as tf -import numpy as np -import numexpr as ne -from aicsimageio import aics_image as AI -from aicsimageio import writers as AIW -from itertools import compress -import ome_types - - - -def get_args(): - # Script description - description="""Subtracts background - Lunaphore platform""" - - # Add parser - parser = AP(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) - - # Sections - inputs = parser.add_argument_group(title="Required Input", description="Path to required input file") - inputs.add_argument("-r", "--root", dest="root", action="store", required=True, help="File path to root image file.") - inputs.add_argument("-m", "--markers", dest="markers", action="store", required=True, help="File path to required markers.csv file") - - outputs = parser.add_argument_group(title="Output", description="Path to output file") - outputs.add_argument("-o", "--output", dest="output", action="store", required=True, help="Path to output file") - outputs.add_argument("-mo", "--marker-output", dest="markerout", action="store", required=True, help="Path to output marker file") - - arg = parser.parse_args() - - # Standardize paths - arg.root = abspath(arg.root) - arg.markers = abspath(arg.markers) - arg.output = abspath(arg.output) - arg.markerout = abspath(arg.markerout) - - return arg - -# add function to WRITE PYRAMID - -def main(args): - - print(f"Root file = {args.root}") - print(f"Marker file = {args.markers}") - - # read in markers.csv file - markers_raw = pd.read_csv(args.markers) - - # required columns for background subtraction are "Filter", "background" and "exposure", everything else is dropped ^^^^ - try: - markers = markers_raw[["Filter", "background", "exposure"]] - except: - raise error("Markers file needs to contain Filter, background and exposure columns") - - # load image as dask array (unloaded unless directly called) - img_raw = AI.AICSImage(args.root) - img = img_raw.get_image_dask_data("CYX") # hardcoding shape, channel is first - assert img.shape[0] == len(markers), "Marker file doesn't match image, check channels" - - # add column with starting indices (which match the image channel indices) - markers['ind'] = range(0, len(markers)) - - # initializing sublist - sublist will indicate which background channel to subtract - sublist = [-1 for i in range(len(markers))] - for i, Filter in enumerate(markers[markers.background == True].Filter): - # in the markers.csv file, the Filter needs to be the exact same!!! - # sublist needs to be able to handle any background index - # - the index of the background column is put to sublist where - # the Filter matches the background - sublist += np.where( - np.logical_and(markers.Filter == Filter, np.array(markers.background != True)), - markers[markers.background == True].iloc[i].ind + 1, 0) - # add sublist to markers - if sublist == -1, no background subtraction - # if other, the value matches the index of the background which needs to be subtracted - markers["sublist"] = sublist - - # iterating over channels - for channel in markers.ind: - # if sublist has negative values, no background subtraction (either because it does - # not match background, or because it is background) - if markers.sublist[channel] < 0: - # no change - applies to background channels as well - print(f"Channel {channel} processed, no background subtraction") - - else: - # scalar is the coefficient with which the background pixel intensity is scaled - # it is the result of dividing the marker exposure time with the background exposure time - # Marker_corr = Marker_root - Background * Exposure_marker / Exposure_background - scalar = markers[markers.ind==channel].exposure.values / markers[markers.ind == markers.sublist[channel]].exposure.values - - # create temporary dataframe which will store the multiplied background rounded up to nearest integer - back = img[markers.sublist[channel]] - back = np.rint(ne.evaluate("back * scalar")).astype(np.uint16) # slowest step of entire script - - # subtract background from processed channel and if the background intensity for a certain pixel was larger than - # the processed channel, set intensity to 0 (no negative values) - img[channel] = np.where(img[channel]= 1 - num_channels, h, w = level_full_shapes[level] - tshape = tile_shapes[level] or (h, w) - tiff = tifffile.TiffFile(outpath) - zimg = zarr.open(tiff.aszarr(series=0, level=level-1, squeeze=False)) - for c in range(num_channels): - sys.stdout.write( - f"\r processing channel {c + 1}/{num_channels}" - ) - sys.stdout.flush() - th = tshape[0] * scale - tw = tshape[1] * scale - for y in range(0, zimg.shape[1], th): - for x in range(0, zimg.shape[2], tw): - a = zimg[c, y:y+th, x:x+tw, 0] - a = skimage.transform.downscale_local_mean( - a, (scale, scale) - ) - if np.issubdtype(zimg.dtype, np.integer): - a = np.around(a) - a = a.astype('uint16') - yield a - - -def main(args): - img_raw = AI.AICSImage(args.root) - img = img_raw.get_image_dask_data("CYX") - - markers_raw = pd.read_csv(args.markers) - markers = process_markers(markers_raw) - - img = subtract(img, markers) - img = remove_back(img, markers) - - markers_raw = markers_raw[markers_raw.background != True] - markers_raw.drop("background", axis = 1, inplace = True) - markers_raw.to_csv(args.markerout) - - # Processing metadata - highly adapted to Lunaphore outputs - metadata = process_metadata(img_raw.metadata, markers) - metadata = img_raw.metadata - - pixel_size = args.pixel_size - - # construct levels - tile_size = 1024 - scale = 2 - - #if hasattr(os, 'sched_getaffinity'): - # num_workers = len(os.sched_getaffinity(0)) - #else: - # num_workers = multiprocessing.cpu_count() - #print(f"Using {num_workers} worker threads on detected CPU count.") - print() - dtype = img.dtype - base_shape = img[0].shape - num_channels = img.shape[0] - num_levels = (np.ceil(np.log2(max(base_shape) / tile_size)) + 1).astype(int) - factors = 2 ** np.arange(num_levels) - shapes = (np.ceil(np.array(base_shape) / factors[:,None])).astype(int) - - print("Pyramid level sizes: ") - for i, shape in enumerate(shapes): - print(f" level {i+1}: {format_shape(shape)}", end="") - if i == 0: - print("(original size)", end="") - print() - print() - - print(shapes) - #executor = concurrent.futures.ThreadPoolExecutor(num_workers) - - #shape_pairs = zip(shapes, shapes[1:]) - - - - - level_full_shapes = [] - for shape in shapes: - level_full_shapes.append((num_channels, shape[0], shape[1])) - level_shapes = shapes - - #level_shapes.append(shape) - #for level in range(num_levels): - # level_full_shapes.append(level_dict[f"level_{level}"].shape) - # level_shapes.append(level_dict[f"level_{level}"].shape[1:]) - - #level_shapes = np.array(level_shapes) - tip_level = np.argmax(np.all(level_shapes < tile_size, axis=1)) - tile_shapes = [ - (tile_size, tile_size) if i <= tip_level else None - for i in range(len(level_shapes)) - ] - - # write pyramid - - with tifffile.TiffWriter(args.output, ome=True, bigtiff=True) as tiff: - tiff.write( - data = img, - shape = level_full_shapes[0], - subifds=int(num_levels-1), - dtype=dtype, - resolution=(10000 / pixel_size, 10000 / pixel_size, "centimeter"), - tile=tile_shapes[0] - ) - for level, (shape, tile_shape) in enumerate( - zip(level_full_shapes[1:], tile_shapes[1:]), 1 - ): - tiff.write( - data = subres_tiles(level, level_full_shapes, tile_shapes, args.output, scale), - #data=level_dict[f"level_{level}"], - shape=shape, - subfiletype=1, - dtype=dtype, - tile=tile_shape - ) - #metadata.images[0].pixels.channels = [metadata.images[0].pixels.channels[i] for i in range(num_channels)] - #metadata.images[0].pixels.size_c = num_channels - #metadata.images[0].pixels.size_x = img.shape[2] - #metadata.images[0].pixels.size_y = img.shape[1] - if metadata.images[0].pixels.planes: - temp_planes = [] - for i, channel_id in enumerate(range(num_channels)): - temp_plane = metadata.images[0].pixels.planes[channel_id] - temp_plane.the_c = i - temp_planes.append(temp_plane) - metadata.images[0].pixels.planes = temp_planes - if metadata.images[0].pixels.tiff_data_blocks and len( - metadata.images[0].pixels.tiff_data_blocks) > 0: - metadata.images[0].pixels.tiff_data_blocks[0].plane_count = num_channels - # Write - tifffile.tiffcomment(args.output, to_xml(metadata)) - print() - - -if __name__ == '__main__': - # Read in arguments - args = get_args() - - # Run script - st = time.time() - main(args) - rt = time.time() - st - print(f"Script finished in {rt // 60:.0f}m {rt % 60:.0f}s") - - - - - - - -