From a714aea4c77a44f7f501f4c271938e80a1c18250 Mon Sep 17 00:00:00 2001
From: Najib Ishaq <najib_ishaq@zoho.com>
Date: Mon, 6 May 2024 07:23:31 -0400
Subject: [PATCH 01/18] feat: added tool

---
 .../rt-cetsa-moltprot-tool/.bumpversion.cfg   |   29 +
 regression/rt-cetsa-moltprot-tool/Dockerfile  |   20 +
 regression/rt-cetsa-moltprot-tool/README.md   |   24 +
 regression/rt-cetsa-moltprot-tool/VERSION     |    1 +
 regression/rt-cetsa-moltprot-tool/plugin.json |   52 +
 .../rt-cetsa-moltprot-tool/pyproject.toml     |   76 +
 .../rt-cetsa-moltprot-tool/run-plugin.sh      |   20 +
 .../regression/rt_cetsa_moltprot/VERSION      |    1 +
 .../regression/rt_cetsa_moltprot/__init__.py  |   48 +
 .../regression/rt_cetsa_moltprot/__main__.py  |   76 +
 .../regression/rt_cetsa_moltprot/core.py      | 3780 +++++++++++++++++
 .../regression/rt_cetsa_moltprot/models.py    |  658 +++
 12 files changed, 4785 insertions(+)
 create mode 100644 regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
 create mode 100755 regression/rt-cetsa-moltprot-tool/Dockerfile
 create mode 100644 regression/rt-cetsa-moltprot-tool/README.md
 create mode 100644 regression/rt-cetsa-moltprot-tool/VERSION
 create mode 100644 regression/rt-cetsa-moltprot-tool/plugin.json
 create mode 100644 regression/rt-cetsa-moltprot-tool/pyproject.toml
 create mode 100644 regression/rt-cetsa-moltprot-tool/run-plugin.sh
 create mode 100644 regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/VERSION
 create mode 100644 regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
 create mode 100644 regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
 create mode 100644 regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py
 create mode 100644 regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/models.py
diff --git a/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg b/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
new file mode 100644
index 0000000..d858009
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
@@ -0,0 +1,29 @@
+[bumpversion]
+current_version = 0.1.0
+commit = True
+tag = False
+parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
+serialize = 
+	{major}.{minor}.{patch}-{release}{dev}
+	{major}.{minor}.{patch}
+
+[bumpversion:part:release]
+optional_value = _
+first_value = dev
+values = 
+	dev
+	_
+
+[bumpversion:part:dev]
+
+[bumpversion:file:pyproject.toml]
+search = version = "{current_version}"
+replace = version = "{new_version}"
+
+[bumpversion:file:plugin.json]
+
+[bumpversion:file:VERSION]
+
+[bumpversion:file:README.md]
+
+[bumpversion:file:src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py]
diff --git a/regression/rt-cetsa-moltprot-tool/Dockerfile b/regression/rt-cetsa-moltprot-tool/Dockerfile
new file mode 100755
index 0000000..dd390da
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/Dockerfile
@@ -0,0 +1,20 @@
+FROM polusai/bfio:2.3.6
+
+# environment variables defined in polusai/bfio
+ENV EXEC_DIR="/opt/executables"
+ENV POLUS_IMG_EXT=".ome.tif"
+ENV POLUS_TAB_EXT=".arrow"
+ENV POLUS_LOG="INFO"
+
+# Work directory defined in the base container
+WORKDIR ${EXEC_DIR}
+
+COPY pyproject.toml ${EXEC_DIR}
+COPY VERSION ${EXEC_DIR}
+COPY README.md ${EXEC_DIR}
+COPY src ${EXEC_DIR}/src
+
+RUN pip3 install ${EXEC_DIR} --no-cache-dir
+
+ENTRYPOINT ["python3", "-m", "polus.tabular.regression.rt_cetsa_moltprot"]
+CMD ["--help"]
diff --git a/regression/rt-cetsa-moltprot-tool/README.md b/regression/rt-cetsa-moltprot-tool/README.md
new file mode 100644
index 0000000..24a28c6
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/README.md
@@ -0,0 +1,24 @@
+# RT_CETSA Moltprot Regression (v0.1.0)
+
+This WIPP plugin runs regression analysis for the RT-CETSA pipeline.
+The input csv file should be sorted by `Temperature` column.
+
+## Building
+
+To build the Docker image for the conversion plugin, run
+`./build-docker.sh`.
+
+## Install WIPP Plugin
+
+If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
+
+## Options
+
+This plugin takes eight input argument and one output argument:
+
+| Name               | Description                                                | I/O    | Type          |
+|--------------------|------------------------------------------------------------|--------|---------------|
+| `--inpDir`         | Input data collection to be processed by this plugin       | Input  | genericData   |
+| `--filePattern`    | Pattern to parse tabular files                             | Input  | string        |
+| `--outDir`         | Output file                                                | Output | genericData   |
+| `--preview`        | Generate JSON file with outputs                            | Output | JSON          |
diff --git a/regression/rt-cetsa-moltprot-tool/VERSION b/regression/rt-cetsa-moltprot-tool/VERSION
new file mode 100644
index 0000000..6e8bf73
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/VERSION
@@ -0,0 +1 @@
+0.1.0
diff --git a/regression/rt-cetsa-moltprot-tool/plugin.json b/regression/rt-cetsa-moltprot-tool/plugin.json
new file mode 100644
index 0000000..e48b1d0
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/plugin.json
@@ -0,0 +1,52 @@
+{
+  "name": "RT-CETSA Moltprot",
+  "version": "0.1.0",
+  "title": "RT-CETSA Moltprot",
+  "description": "Run regression analysis for the RT-CETSA pipeline.",
+  "author": "Nicholas Schaub (nick.schaub@nih.gov), Najib Ishaq (najib.ishaq@nih.gov)",
+  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
+  "repository": "https://github.com/PolusAI/tabular-tools",
+  "website": "https://ncats.nih.gov/preclinical/core/informatics",
+  "citation": "",
+  "containerId": "polusai/rt-cetsa-moltprot-tool:0.1.0",
+  "baseCommand": [
+    "python3",
+    "-m",
+    "polus.tabular.regression.rt_cetsa_moltprot"
+  ],
+  "inputs": [
+    {
+      "name": "inpDir",
+      "type": "genericData",
+      "description": "Input data collection to be processed by this plugin",
+      "required": true
+    },
+    {
+      "name": "filePattern",
+      "type": "string",
+      "description": "Pattern to parse input files",
+      "default": ".+",
+      "required": false
+    }
+  ],
+  "outputs": [
+    {
+      "name": "outDir",
+      "type": "genericData",
+      "description": "Output data collection"
+    }
+  ],
+  "ui": [
+    {
+      "key": "inputs.inpDir",
+      "title": "Input collection",
+      "description": "Input data collection to be processed by this plugin"
+    },
+    {
+      "key": "inputs.filePattern",
+      "title": "filePattern",
+      "description": "Pattern to parse input files",
+      "default": ".+"
+    }
+  ]
+}
diff --git a/regression/rt-cetsa-moltprot-tool/pyproject.toml b/regression/rt-cetsa-moltprot-tool/pyproject.toml
new file mode 100644
index 0000000..8627e09
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/pyproject.toml
@@ -0,0 +1,76 @@
+[tool.poetry]
+name = "polus_tabular_regression_rt_cetsa_moltprot"
+version = "0.1.0"
+description = "Run regression analysis for the RT-CETSA pipeline."
+authors = [
+    "Nick Schaub <nick.schaub@nih.gov>",
+    "Najib Ishaq <najib.ishaq@nih.gov>",
+]
+readme = "README.md"
+packages = [{include = "polus", from = "src"}]
+
+[tool.poetry.dependencies]
+python = ">=3.9,<3.12"
+typer = "^0.7.0"
+filepattern = "^2.0.5"
+pandas = "^2.2.2"
+matplotlib = "^3.8.4"
+scipy = "^1.13.0"
+
+[tool.poetry.group.dev.dependencies]
+bump2version = "^1.0.1"
+pre-commit = "^3.1.0"
+pytest = "^7.2.1"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.ruff]
+extend = "../../ruff.toml"
+extend-ignore = [
+    "RET505",  # Unnecessary `else` after `return` statement
+    "E501",  # Line too long
+    "ANN001",  # Missing type annotation
+    "D102",  # Missing docstring in public method
+    "ANN201",  # Missing return type annotation
+    "N806",  # Variable in function should be lowercase
+    "D205",  # 1 blank line required between summary line and description
+    "N803",  # Argument name should be lowercase
+    "PLR0913",  # Too many arguments
+    "D415",  # First line should end with a period, question mark, or exclamation point
+    "PLR2004",  # Magic value used in comparison
+    "B006",  # Do not use mutable default arguments
+    "D107",  # Missing docstring
+    "D101",  # Missing docstring
+    "E731",  # Do not assign a lambda expression, use a def
+    "E402",  # Module level import not at top of file
+    "PTH123",  # `open()` should be replaced with `Path.open()`
+    "PTH118",  # `os.path.join()` should be replaced with `/` operator
+    "PTH100",  # `os.path.abspath()` should be replaced with `Path.resolve()`
+    "PLR0915",  # Too many statements
+    "PLR0912",  # Too many branches
+    "C901",  # Function is too complex
+    "T201",  # `print` used
+    "E722",  # Do not use bare 'except'
+    "B904",  # Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
+    "ANN202",  # Missing return type annotation for private function
+    "ARG002",  # Unused method argument
+    "N802",  # Function name should be lowercase
+    "PTH103",  # `os.makedirs()` should be replaced with `Path.mkdir(parents=True)`
+    "ANN003",  # Missing type annotation for `**kwargs`
+    "B007",  # Loop control variable not used within the loop body
+    "ANN204",  # Missing return type annotation for magic method
+    "D417",  # Missing argument descriptions in the docstring
+    "ANN205",  # Missing return type annotation for static method
+    "PLR5501",  # Use `elif` instead of `else` following `if` condition to avoid unnecessary indentation
+    "EM102",  # Exception must not use an f-string literal
+    "D414",  # Section has no content
+    "RUF012",  # Mutable class attributes should be annotated with `typing.ClassVar`
+    "A001",  # Variable `input` is shadowing a Python builtin
+    "A002",  # Argument `input` is shadowing a Python builtin
+    "E741",  # Ambiguous variable name: `l`
+    "PTH120",  # `os.path.dirname()` should be replaced by `Path.parent`
+    "N816",  # Variable `cfFilename` in global scope should not be mixedCase
+    "PTH109",  # `os.getcwd()` should be replaced by `Path.cwd()`
+]
diff --git a/regression/rt-cetsa-moltprot-tool/run-plugin.sh b/regression/rt-cetsa-moltprot-tool/run-plugin.sh
new file mode 100644
index 0000000..bea8c33
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/run-plugin.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+version=$(<VERSION)
+datapath=$(readlink --canonicalize data)
+
+# Inputs
+inpDir=/data/input
+filePattern=".*"
+
+# Output paths
+outDir=/data/output
+
+# Log level, must be one of ERROR, CRITICAL, WARNING, INFO, DEBUG
+LOGLEVEL=INFO
+
+docker run --mount type=bind,source=${datapath},target=/data/  \
+            --env POLUS_LOG=${LOGLEVEL} \
+            polusai/rt-cetsa-moltprot-tool:${version} \
+            --inpDir ${inpDir} \
+            --filePattern ${filePattern} \
+            --outDir ${outDir}
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/VERSION b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/VERSION
new file mode 100644
index 0000000..bcea87e
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/VERSION
@@ -0,0 +1 @@
+0.3.1-alpha
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
new file mode 100644
index 0000000..6d3ec01
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
@@ -0,0 +1,48 @@
+"""RT_CETSA Moltprot Regression Tool."""
+
+__version__ = "0.1.0"
+
+import os
+import pathlib
+
+import pandas
+
+from . import core
+from . import models
+
+POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", ".csv")
+
+
+def fit_data(file_path: pathlib.Path) -> pandas.DataFrame:
+    """Fit data to a model using Moltprot."""
+    fit = core.MoltenProtFit(
+        filename=file_path,
+        input_type="csv",
+    )
+
+    fit.SetAnalysisOptions(
+        model="santoro1988",
+        baseline_fit=3,
+        baseline_bounds=3,
+        dCp=0,
+        onset_threshold=0.01,
+        savgol=10,
+        blanks=[],
+        exclude=[],
+        invert=False,
+        mfilt=None,
+        shrink=None,
+        trim_max=0,
+        trim_min=0,
+    )
+
+    fit.PrepareData()
+    fit.ProcessData()
+
+    return fit.plate_results.sort_values("BS_factor")
+
+
+def gen_out_path(file_path: pathlib.Path, out_dir: pathlib.Path) -> pathlib.Path:
+    """Generate the output path."""
+    file_name = file_path.stem + "_moltprot" + POLUS_TAB_EXT
+    return out_dir / file_name
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
new file mode 100644
index 0000000..3c43a9f
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
@@ -0,0 +1,76 @@
+"""CLI for rt-cetsa-moltprot-tool."""
+
+import json
+import logging
+import os
+import pathlib
+
+import filepattern
+import typer
+from polus.tabular.regression.rt_cetsa_moltprot import fit_data
+from polus.tabular.regression.rt_cetsa_moltprot import gen_out_path
+
+# Initialize the logger
+logging.basicConfig(
+    format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
+    datefmt="%d-%b-%y %H:%M:%S",
+)
+logger = logging.getLogger("polus.tabular.transforms.tabular_merger")
+logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO))
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    inp_dir: pathlib.Path = typer.Option(
+        ...,
+        help="Input directory containing the data files.",
+        exists=True,
+        dir_okay=True,
+        readable=True,
+        resolve_path=True,
+    ),
+    pattern: str = typer.Option(
+        ".+",
+        help="Pattern to match the files in the input directory.",
+    ),
+    preview: bool = typer.Option(
+        False,
+        help="Preview the files that will be processed.",
+    ),
+    out_dir: pathlib.Path = typer.Option(
+        ...,
+        help="Output directory to save the results.",
+        exists=True,
+        dir_okay=True,
+        writable=True,
+        resolve_path=True,
+    ),
+) -> None:
+    """CLI for rt-cetsa-moltprot-tool."""
+    # TODO: Add to docs that input csv file should be sorted by `Temperature` column.
+    logger.info("Starting the CLI for rt-cetsa-moltprot-tool.")
+
+    logger.info(f"Input directory: {inp_dir}")
+    logger.info(f"File Pattern: {pattern}")
+    logger.info(f"Output directory: {out_dir}")
+
+    fp = filepattern.FilePattern(inp_dir, pattern)
+    inp_files = [f[1][0] for f in fp()]
+
+    if preview:
+        out_json = {"files": [gen_out_path(f, out_dir) for f in inp_files]}
+        with (out_dir / "preview.json").open("w") as f:
+            json.dump(out_json, f, indent=2)
+        return
+
+    for f in inp_files:
+        logger.info(f"Processing file: {f}")
+        out_path = gen_out_path(f, out_dir)
+        df = fit_data(f)
+        df.to_csv(out_path, index=True)
+
+
+if __name__ == "__main__":
+    app()
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py
new file mode 100644
index 0000000..31ba0f1
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py
@@ -0,0 +1,3780 @@
+"""Copyright 2018-2021 Vadim Kotov, Thomas C. Marlovits.
+
+This file is part of MoltenProt.
+
+MoltenProt is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+MoltenProt is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MoltenProt.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
+### Citation
+# a simple dict with strings that provide different citation formatting
+citation = {
+    "long": "\nIf you found MoltenProt helpful in your work, please cite:\nKotov et al., Protein Science (2021)\ndoi: 10.1002/pro.3986\n",
+    "html": """<p>If you found MoltenProt helpful in your work, please cite: </p>
+                      <p>Kotov et al., Protein Science (2021)</p>
+                      <p><a href="https://dx.doi.org/10.1002/pro.3986">doi: 10.1002/pro.3986</a></p>""",
+    "short": "Citation: Kotov et al., Protein Science (2021) doi: 10.1002/pro.3986",
+}
+
+
+### Modules
+# some useful mathematical functions
+# For printing line number and file name
+from inspect import currentframe
+from inspect import getframeinfo
+
+# for generating htmls
+from string import Template
+
+# plotting
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib import gridspec
+
+# for color conversion
+from matplotlib.colors import rgb2hex
+
+# interpolation
+from scipy.interpolate import interp1d
+
+# fitting routine from scipy
+from scipy.optimize import curve_fit
+
+# median filtering
+from scipy.signal import medfilt
+
+cf = currentframe()
+cfFilename = getframeinfo(cf).filename  # type: ignore[arg-type]
+
+# handling exceptions
+# saving class instances to JSON format
+import json
+
+# creating folders
+import os
+import sys
+
+# function to recognize module versions
+from distutils.version import LooseVersion
+
+# for compression of output JSON
+# for timestamps
+from time import strftime
+
+# data processing
+import pandas as pd
+
+# import the fitting models
+from . import models
+
+# A variable for reliable access to other resources of MoltenProt (e.g. report template)
+__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+
+# MoltenProt is stored in a plain-text file VERSION (also used by setup.py)
+# extract and save it to a variable
+with open(os.path.join(__location__, "VERSION")) as version_file:
+    __version__ = version_file.read().strip()
+
+# get scipy version (some methods may not be available in earlier versions)
+scipy_version = sys.modules["scipy"].__version__
+
+# check if running from a PyInstaller bundle
+if hasattr(sys, "frozen") and hasattr(sys, "_MEIPASS"):
+    from_pyinstaller = True
+else:
+    from_pyinstaller = False
+
+# for parallel processing of some for loops using joblib (may hang up)
+try:
+    from joblib import Parallel
+    from joblib import delayed
+
+    # this is only needed if we want to auto-estimate the available cores - currently done in MoltenProtMain
+    # access module version without importing the whole module:
+    joblib_version = sys.modules["joblib"].__version__
+    # check joblib version: only 0.12 and above can pickle instance methods (which is used in mp)
+    if LooseVersion(joblib_version) >= LooseVersion("0.12"):
+        parallelization = True
+    else:
+        print(
+            "Warning: available joblib version ({}) is incompatible with current code parallelization".format(
+                joblib_version,
+            ),
+        )
+        print("Information: Consider updating to joblib 0.12 or above")
+        parallelization = False
+except ImportError:
+    print("Warning: joblib module not found, parallelization of code is not possible.")
+    joblib_version = "None"  # only for printing version info
+    parallelization = False
+
+# NOTE MoltenProtFit and MoltenProtFitMultiple have different parallelization approaches:
+# MoltenProtFit - can only parallelize figure plotting and n_jobs=3 works well
+# MoltenProtFitMultiple - reads and runs several MoltenProtFit instances in parallel (F330, F350 etc),
+# but each of them gets only one job (i.e. MoltenProtFit.n_jobs is always 1)
+
+### Constants imported from models.py
+R = models.R
+T_std = models.T_std
+
+# Standard plate index
+# NOTE currently the software requires the layout to contain A1-H12 index, even if only a small part is used
+alphanumeric_index = []
+for k in ["A", "B", "C", "D", "E", "F", "G", "H"]:
+    for l in range(1, 13):
+        alphanumeric_index.append(k + str(l))
+# convert index to pandas Series and set its name
+alphanumeric_index = pd.Series(data=alphanumeric_index)
+alphanumeric_index.name = "ID"  # type: ignore[attr-defined]
+
+# dictionary holding the default values and and their description for CLI interface/tooltips
+# dictionary key is the name of the option, each entry contains a tuple of default parameter value and its descriptions
+# NOTE for mfilt the default value is only used when the option is supplied in the CLI
+
+# default data preparation parameters
+prep_defaults = {
+    "blanks": [],
+    "blanks_h": "Input the sample ID's with buffer-only control",  # subtract blanks in prep step
+    "exclude": [],
+    "exclude_h": 'Specify the well(s) to omit during analysis; this option is intended for simple removal of some bad wells; if many samples must be excluded, use a layout and add "Ignore" to the annotation of the sample',
+    "invert": False,  # DELETE?
+    "invert_h": "Invert the curve",
+    "mfilt": None,
+    "mfilt_h": "Apply median filter with specificed window size (in temperature units) to remove spikes; 4 degrees is a good starting value",
+    "shrink": None,
+    "shrink_h": "Average the data points to a given degree step;\n may help to make trends more apparent and speeds up the computation;\n typical values 0.5-3.0",
+    "trim_max": 0,
+    "trim_max_h": "Decrease the finishing temperature by this value",
+    "trim_min": 0,
+    "trim_min_h": "Increase the starting temperature by this value",
+}
+
+# default analysis parameters
+analysis_defaults = {
+    "model": "santoro1988",
+    "model_h": "Select a model for describing the experimental data",
+    "baseline_fit": 10,
+    "baseline_fit_h": "The length of the input data (in temperature degrees) for initial estimation of pre- and post-transition baselines",
+    "baseline_bounds": 3,
+    "baseline_bounds_h": "Baseline bounds are set as multiples of stdev of baseline parameters obtained in the pre-fitting routine; this should stabilize the fit and speed up convergence; set to 0 to remove any bounds for baselines",
+    "dCp": 0,
+    "dCp_h": "Heat capacity change of unfolding for all samples (in J/mol/K), used only in equilibrium models; this value overrides the respective column in the layout",
+    "onset_threshold": 0.01,
+    "onset_threshold_h": "Percent unfolded to define the onset of unfolding",
+    "savgol": 10,
+    "savgol_h": "Set window size (in temperature units) for Savitzky-Golay filter used to calculate the derivative",
+}
+
+# all other settings for MoltenProtFit
+defaults = {
+    "debug": False,  # currently not exposed in the CLI
+    "debug_h": "Print developer information to the console",
+    "dec": ".",
+    "dec_h": "CSV decimal separator, enclosed in quotes",
+    "denaturant": "C",
+    "denaturant_h": "For plain CSV input only; specify temperature scale that drives denaturation, in K or C",
+    "j": 1,  # TODO not related to the core functions, supply to respective methods (output etc)
+    "j_h": "Number of jobs to be spawned by parallelized parts of the code; should not be higher than the amount of CPU's in the computer; for most recent laptops a value of 3 is recommended",
+    "layout": None,  # TODO should not be set in SetAnalysisOptions, but rather in __init__
+    "layout_h": "CSV file with layout",
+    "sep": ",",
+    "sep_h": "CSV separator, enclosed in quotes",
+    "readout": "Signal",
+    "readout_h": "For plain CSV input only; specify type of input signal",
+    "spectrum": False,
+    "spectrum_h": "If true, columns in the input CSV will be treated as separate wavelengths of a spectrum",
+    "heatmap_cmap": "coolwarm_r",  # a color-safe heatmap color with red being "bad" (low value)
+    "heatmap_cmap_h": "Matplotlib code for colormap that would be used to color-code heatmaps in reports or images",
+}
+
+# dictionary with avialble models
+# NOTE to be automatically identified the model has to be subclass or subsubclass of MoltenProtModel
+avail_models = {}
+for model in models.MoltenProtModel.__subclasses__():
+    avail_models[model.short_name] = model  # subclass of MoltenProtModel
+    for submodel in model.__subclasses__():  # subsubclass of MoltenProtModel
+        avail_models[submodel.short_name] = submodel
+
+# add a dummy model to indicate that the dataset should not be analysed
+avail_models["skip"] = "skip"  # type: ignore[assignment]
+
+### Utility functions
+
+
+def normalize(input, new_min=0, new_max=1, from_input=False):
+    """Helper function to normalize a pandas Series.
+
+    Make the data occupy a specified range (defined by new_min and new_max)
+
+    Parameters
+    ----------
+    input : pd.Series
+        input Series to normalize
+    new_min, new_max : float
+        range for normalization (default [0-1])
+    from_input : bool
+        use absolute vales of min/max of the input series to create normalization range
+        this is used to perform inversion of curves (mirror around X-axis)
+
+    References:
+    ----------
+    The general formula for normalization is from here:
+    https://en.wikipedia.org/wiki/Normalization_(image_processing)
+    """
+    if from_input:
+        new_min = min(abs(input))
+        new_max = max(abs(input))
+
+    return (input - input.min()) * (new_max - new_min) / (
+        input.max() - input.min()
+    ) + new_min
+
+
+def to_odd(input, step):
+    """Helper function to convert the window length in temperature units to an odd number of datapoints.
+
+    NOTE for user's simplicity the window size is specified in temperature units
+    for filters, however, an odd integer is needed
+
+    Parameters
+    ----------
+    input : float
+        number in degrees to convert to a number of datapoints
+    step : float
+        temperature step in the dataset
+
+    Returns:
+    -------
+    float
+        an odd number of datapoints which should more or less fit in the
+        requested temperature range
+    """
+    # calculate the approximate amount of datapoints corresponding to the range
+    input = input / step
+    # convert input to an integer
+    input = int(input)
+    # if it can be divided by 2 without the remainder (Euclidean division)
+    # than it is even and needs to become odd
+    # % computes the remainder from Euclidean division
+    # the complementary operator is //
+    if not input % 2:
+        input = input + 1
+    return input
+
+
+def analysis_kwargs(input_dict):
+    """Takes a dict and returns a valid argument dict for SetAnalysisOptions
+    Unknown options are removed, default values are supplied.
+    """
+    output = {}
+    for i, j in input_dict.items():
+        if i in list(analysis_defaults.keys()) or i in list(prep_defaults.keys()):
+            output[i] = j
+    return output
+
+
+def showVersionInformation():
+    """Print dependency version info."""
+    print(cfFilename, currentframe().f_lineno)
+    print(f"MoltenProt v. {__version__}")
+    if from_pyinstaller:
+        print("(PyInstaller bundle)")
+    pd.show_versions(as_json=False)  # matplotlib is also there
+    print(f"joblib           : {joblib_version}")
+
+
+### Wrappers
+
+
+def mp_read_excel(filename, sheetname, index_col):
+    """Read XLSX files to pandas DataFrames and return None if errors occur.
+
+    Parameters
+    ----------
+    filename
+        filename for reading
+    sheetname
+        which sheet to read from file
+    index_col
+        which column to use as index
+
+    Returns:
+    -------
+    pd.DataFrame from Excel file or None if reading failed
+    """
+    try:
+        # NOTE in pandas below 0.24 even though the data should be considered index-free (index_col=None)
+        # the first column was sometimes used as index (corresponds to Time in Prometheus XLSX)
+        # for now explicitly pass some column to use as index
+        return pd.read_excel(filename, sheetname, index_col=index_col)
+    except:
+        # NOTE XLSX parsing module has its own error naming system, so individual errors cannot be catched unless xlrd module is imported
+        print(
+            'Warning: sheet "{}" does not exist in the input file {}'.format(
+                sheetname,
+                filename,
+            ),
+        )
+        return None
+
+
+### JSON I/O functions and variables
+
+
+def serialize(obj):
+    """Helper function for serialization of DataFrames and other non-standard objects."""
+    if isinstance(obj, pd.core.frame.DataFrame):
+        # NOTE original data contains 12 decimal digits, while default encoding parameter is 10
+        # pandas calculates with 16 decimal digits (or more?) and in this part there will be certain discrepancy
+        # between the runs (i.e. run analysis>save to json>run analysis again)
+        # in any case the impact on the ultimate result is not measurable
+        return {
+            "DataFrame": obj.to_json(
+                force_ascii=False,
+                double_precision=15,
+                orient="split",
+            ),
+        }
+    elif isinstance(obj, MoltenProtFit):
+        output = obj.__dict__
+        # delete some method descriptions (only needed for parallel processing)
+        if "plotfig" in output:
+            del output["plotfig"]
+
+        # delete layout attribute, it will be set from parent MPFM when loading back
+        if "layout" in output:
+            del output["layout"]
+
+        return {"MoltenProtFit": output}
+    elif isinstance(obj, MoltenProtFitMultiple):
+        output = obj.__dict__
+        # a better way to query a dict, see:
+        # https://docs.quantifiedcode.com/python-anti-patterns/correctness/not_using_get_to_return_a_default_value_from_a_dictionary.html
+        # TODO this deletion is probably not needed?
+        if output.get("PrepareAndAnalyseSingle"):
+            del output["PrepareAndAnalyseSingle"]
+        if output.get("WriteOutputSingle"):
+            del output["WriteOutputSingle"]
+        # add version info and timestamp
+        return {
+            "MoltenProtFitMultiple": output,
+            "version": __version__,
+            "timestamp": strftime("%c"),
+        }
+    return None
+
+
+def deserialize(input_dict):
+    """Helper function to deserialize MoltenProtFit instances and DataFrames from JSON files (read into a dict).
+
+    If the object contains any plate_ entries, cycle through them and convert to DataFrames
+    also applies to the layout entry (again, a DataFrame)
+    """
+    if input_dict.get("MoltenProtFit"):
+        # read a MPF instance from the dict
+        # plate_ variables conversion occurs during intitialization.
+        # the downstream part of the json (e.g. a single MPFIT instance)
+        output = MoltenProtFit(None, input_type="from_dict")
+        output.__dict__.update(input_dict["MoltenProtFit"])
+        return output
+    elif input_dict.get("MoltenProtFitMultiple"):
+        # this level also has version info and timestamp
+        # NOTE if needed, this section can be used to discard too old JSON sessions
+        if input_dict.get("version") and input_dict.get("timestamp"):
+            print(
+                "Information: JSON session was created with MoltenProt v. {} on {}".format(
+                    input_dict["version"],
+                    input_dict["timestamp"],
+                ),
+            )
+        else:
+            print(
+                "Warning: JSON session contains no version and timestamp, is it an old one?",
+            )
+        # create an empty instance and update its dict
+        output = MoltenProtFitMultiple()
+        output.__dict__.update(input_dict["MoltenProtFitMultiple"])
+
+        # set layouts in all datasets
+        output.UpdateLayout()
+        return output
+    elif input_dict.get("DataFrame"):
+        return pd.read_json(input_dict["DataFrame"], precise_float=True, orient="split")
+    return input_dict
+
+
+### Classes
+
+
+class MoltenProtFit:
+    """Stores and analyses a single dataset.
+
+    Attributes:
+    ----------
+    defaults : dict
+        dictionary holding the default values and and their description
+    hm_dic : dict
+        dictionary for important heatmap parameters
+    plate - the workhorse variable, contains the values currently being procesed
+    plate_raw - initially imported data, but without invalid wells or the ones excluded by user
+    plate_results - fit values and sort score for individual wells
+    plate_derivative - the derivative curve for raw (?) data
+    plate_fit - the curves computed based on the fit parameters
+    resultfolder - a subfolder created from the file name to write the output (assigned by MoltenProtMain.py)
+    dT - step of temperature
+    xlim - the range for x-axis on RFU(T) plots
+    filename - the name of the input file
+    Lists holding removed wells:
+    bad_fit - ID of wells that could not be fit
+    blanks - blank wells (subtracted prior to analysis) -> now part of SetAnalysisOptions
+    exclude - supplied by user through --exclude option -> now part of SetAnalysisOptions
+    bad_Tm - the melting temperature is out of range of T
+    readout_type - the signal in the assay
+
+    Notes:
+    -----
+    This is an internal class, do not create it manually.
+    All official messages must be sent through print_message() method (enforces selecting message type)
+    Output information style:
+    "Fatal: " - error that causes the program to stop
+    "Warning: " - something may be messed up, but the program can proceed
+    "Information: " any other message, that has nothing to do with the program flow
+    """
+
+    # dictionary for important heatmap parameters:
+    # > which values are good high or low
+    # > what is the title for plot
+    # > what are the tick labels
+    # TODO for Tm, dHm and the like, substitute text info to the min and max numbers
+    # TODO use this as is a general hm_dic for all types of analysis
+    hm_dic = {
+        "S": {
+            "lowerbetter": True,
+            "title": "Std. Error of Estimate Heatmap",
+            "tick_labels": ["Bad fit", "Reference", "Good fit"],
+        },
+        "dHm_fit": {
+            "lowerbetter": False,
+            "title": "Unfolding Enthalpy Heatmap",
+            "tick_labels": ["Low dHm", "Reference", "High dHm"],
+        },
+        "Tm_fit": {
+            "lowerbetter": False,
+            "title": "Melting Temperature Heatmap",
+            "tick_labels": ["Low Tm", "Reference", "High Tm"],
+        },
+        "d_fit": {
+            "lowerbetter": True,
+            "title": "Slope Heatmap",
+            "tick_labels": ["Flat", "Reference", "Steep"],
+        },
+        "Tm_init": {
+            "lowerbetter": False,
+            "title": "Melting Temperature Heatmap",
+            "tick_labels": ["Low Tm", "Reference", "High Tm"],
+        },
+        "T_onset": {
+            "lowerbetter": False,
+            "title": "Onset Temperature Heatmap",
+            "tick_labels": ["Low T_ons", "Reference", "High T_ons"],
+        },
+        "a_fit": {
+            "lowerbetter": True,
+            "title": "Slope Heatmap",
+            "tick_labels": ["Flat", "Reference", "Steep"],
+        },
+        "Tagg_fit": {
+            "lowerbetter": False,
+            "title": "Aggregation Temperature Heatmap",
+            "tick_labels": ["Low Tagg", "Reference", "High Tagg"],
+        },
+        "Tagg_init": {
+            "lowerbetter": False,
+            "title": "Aggregation Temperature Heatmap",
+            "tick_labels": ["Low Tagg", "Reference", "High Tagg"],
+        },
+        "dG_std": {
+            "lowerbetter": False,
+            "title": "Standard Gibbs Free Energy of Unfolding",
+            "tick_labels": ["na", "na", "na"],
+        },
+        "BS_factor": {
+            "lowerbetter": False,
+            "title": "Dimesionless Signal Window",
+            "tick_labels": ["Narrow", "Reference", "Wide"],
+        },
+    }
+
+    def __init__(
+        self,
+        filename,
+        scan_rate=None,
+        denaturant=defaults["denaturant"],
+        sep=defaults["sep"],
+        dec=defaults["dec"],
+        debug=defaults["debug"],
+        input_type="csv",
+        parent_filename="",
+        readout_type="Signal",
+    ) -> None:
+        """Parameters
+        ----------
+        filename
+            for csv files the file to parse, when MPF is made by MPFM this will be substituted to the filename used to create MPFM
+        scan_rate
+            scan rate in degrees per min (required for kinetic models)
+        denaturant
+            temperature (C or K) or chemical (under construction)
+        sep,dec
+            csv import parameters
+        debug
+            print additional info
+        input_type
+            defines where the data is coming from:
+                > csv - default value, corresponds to the standard csv file
+                > from_xlsx - means that the instance of MoltenProtFit is being created by MoltenProtFitMultiple, input will be a ready-to-use DataFrame
+        parent_filename
+            the filename of the original xlsx file
+        readout_type
+            the name of the readout (for plot Y-axis)
+
+        Notes:
+        -----
+        """
+        # set debugging mode: if the __version__ is "git", then this is a development file
+        # and debugging is done by default. In all other cases, it is False
+        # TODO add debug to CLI interface
+        if __version__[:3] == "git":
+            self.debug = True
+        else:
+            self.debug = debug
+
+        if input_type == "from_dict":
+            # this would return an empty object
+            # all manipulations are done by deserialize function
+            pass
+        else:
+            # NOTE the logic of overwriting etc is handled by CLI/GUI code
+            self.resultfolder = None
+            # attribute to hold diagnostic messages.
+            self.protocolString = ""
+            self.scan_rate = scan_rate
+            self.fixed_params = None
+            # load the dataset into the plate variable
+            if input_type == "csv":
+                try:
+                    self.plate = pd.read_csv(
+                        filename,
+                        sep=sep,
+                        decimal=dec,
+                        index_col="Temperature",
+                        encoding="utf-8",
+                    )
+                except ValueError:
+                    self.print_message("Input *.csv file is invalid!", "f")
+                    print(
+                        "Please check if column called 'Temperature' exists and separators are specified correctly.",
+                    )
+                # the filename is needed for report title
+                self.filename = filename
+            elif input_type == "from_xlsx":
+                self.plate = filename
+                # BUG this is only needed for report generation, a dummy value
+                # Ideally, it should be the name of parent xlsx file
+                self.filename = parent_filename
+
+            # If requested, convert temperature of input files to Kelvins
+            if denaturant == "K":
+                # input temperature is already in internal units
+                self.denaturant = "K"
+            elif denaturant == "C":
+                # convert Celsius to Kelvins
+                self.plate.index = self.plate.index + 273.15
+                self.denaturant = "K"
+            elif denaturant == "F":
+                msg = "NO Fahrenheit, please!"
+                raise ValueError(msg)
+            else:
+                # if temperature was not recognized, set denaturant to chemical type
+                # NOTE currently not implemented in MoltenProtFit
+                print(f"Assuming that {denaturant} is a chemical denaturant")
+                self.denaturant = denaturant
+                msg = "Chemical denaturation not implemented yet"
+                raise NotImplementedError(msg)
+
+            # save a copy of raw data
+            self.plate_raw = self.plate.copy()
+
+            # compute the (average) step of Temperature (required for conversion of temperature ranges to sets of datapoints)
+            self.dT = (max(self.plate.index) - min(self.plate.index)) / (
+                len(self.plate.index) - 1
+            )
+
+            # for plots only: compute xlim from Temperature range
+            self.xlim = [min(self.plate.index) - 5, max(self.plate.index) + 5]
+
+            # currently only for plots: give a proper label to Y-axis
+            self.readout_type = readout_type
+
+            self.bad_fit = []  # type: ignore[var-annotated]
+            self.bad_Tm = []  # type: ignore[var-annotated]
+
+    def __getstate__(self):
+        """A special method to enable pickling of class methods (for parallel exectution)."""
+        output = self.__dict__
+        output["plotfig"] = self.plotfig
+        return output
+
+    def converter96(self, use_column, reference=None):
+        """Reads self.plate_results with well ID in the index (A1, A2 etc) and some values in columns (Tm_fit, etc) and returns a DataFrame emulating a 96-well plate where each well has a normalized respective column value.
+
+        Parameters
+        ----------
+        use_column
+            which column to use in self.plate_results
+        reference
+            well to use as reference
+
+        Notes:
+        -----
+        Normalization is done based on lowerbetter information from self.hm_dic:
+        1 - means the highest possible value and the best value as well
+        0 - means the worst and the lowest possible value
+
+        If a reference is supplied, it is subtracted from the data
+        Reference code _was not maintained_ for a while and is probably faulty!
+        """
+        # create a DataFrame emulating a 96-well plate
+        output = pd.DataFrame(
+            index=["A", "B", "C", "D", "E", "F", "G", "H"],
+            columns=list(range(1, 13)),
+        )
+
+        # check if the use_column value is a valid one
+        self.print_message(
+            "Creating heatmap for column {}".format(
+                self.plate_results[use_column].name,
+            ),
+            "i",
+        )
+        for i in output.index:
+            for j in output.columns:
+                a = i + str(j)
+                if (use_column in self.plate_results.columns) and (
+                    a in self.plate_results.index
+                ):
+                    output.loc[[i], [j]] = self.plate_results.loc[a, use_column]
+
+        # if reference value is supplied, subtract it from the values
+        if reference is not None:
+            try:
+                # probe if the well is OK (not NaN), that's easier to do with the input DataFrame
+                output = output - self.plate_results[use_column][reference]
+            except KeyError:
+                self.print_message(
+                    "Supplied reference is invalid, creating a reference-free heatmap",
+                    "w",
+                )
+
+        # invert the values if lowerbetter=true
+        # just a precaution against outdated self.hm_dic
+        try:
+            if self.hm_dic[use_column]["lowerbetter"]:
+                output = output - max(output.max())
+                output = output * -1
+        except KeyError:
+            self.print_message(
+                "{} was not found in hm_dic, the colors may be wrong!".format(
+                    use_column,
+                ),
+                "w",
+            )
+
+        # bring everything to range 0-1
+        # normalize by max value (this way we make sure that Nans changed to 1000 are out of range)
+        # NOTE in some rare cases, when there is only one sample left and it has value 0 in plate96
+        min_val = output.min().min()
+        max_val = output.max().max()
+        if min_val == max_val:
+            self.print_message(
+                "Only one sample left after pre-processing and fitting!",
+                "w",
+            )
+            output = output * 0
+        else:
+            output = (output - min_val) / (max_val - min_val)
+        # make all Nan's equal to 1000
+        output.fillna(1000, inplace=True)
+        return output
+
+    def heatmap(
+        self,
+        output_path,
+        plate96,
+        use_column,
+        heatmap_cmap=defaults["heatmap_cmap"],
+        lowerbetter=False,
+        title="Result Heatmap",
+        tick_labels=["Unstable", "Reference", "Stable"],
+        pdf_report=False,
+        save=True,
+    ):
+        """Create a heatmap.
+
+        Parameters
+        ----------
+        output_path
+            where to save the image
+        plate96
+            dataframe created with method converter96
+        use_column
+            which curve parameter to use for heatmap
+        heatmap_cmap
+            matplotlib colormap for heatmap
+        lowerbetter
+            indicated if lower values of a parameters correspond to higher stability (e.g. S)
+        title
+            the title of the heatmap
+        tick_labels
+            how to label the colorbar
+        pdf_report
+            if True, will only return a figure object (and size will be adjusted to meet A4)
+        save
+            if True, save image to disk
+        """
+        # tweak colormap to have outside values colored as gray
+        cmap = plt.get_cmap(heatmap_cmap).copy()
+        cmap.set_over("0.5")
+
+        # check if there are negative values
+        # (plate96_sort<0).any() returns a Series with True/False indicating presence of negative values in respective columns
+        # we then use any() for this series to get the True statement. Very weird way to do it indeed.
+        if ((plate96 < 0).any()).any():
+            # negative values are present, so we're dealing with ref-based data set
+
+            # now check if we are dealing with higherbetter or lowerbetter values
+            if lowerbetter:
+                # we have to invert sign so that all values that are less than the reference are positive
+                plate96 = plate96 * -1
+
+            # Normalize positive values by the highest value and negative values by the lowest
+            # this is not fully correct because the slope of color(value) dependence line is different for values
+            # below zero and above zero. Other ways to do it are too hacky anyway
+            plate96[plate96 > 0] = plate96[plate96 > 0] / max(plate96.max())
+            plate96[plate96 < 0] = plate96[plate96 < 0] / abs(min(plate96.min()))
+            vmin = -1
+            tick_values = [-1, 0, 1]
+        else:
+            vmin = min(plate96.min())
+            tick_values = [vmin, 1]
+            # this format means how many decimal digits is allowed
+            # set tick labels from use_column
+            col_min = f"{self.plate_results[use_column].min():.3f}"
+            col_max = f"{self.plate_results[use_column].max():.3f}"
+            tick_labels = [col_max, col_min] if lowerbetter else [col_min, col_max]
+
+        # Making not available values gray (e.g. bad fit or blanks)
+        # either convert pd to numpy array and make a mask, or
+        # create figure canvas
+        # A4 is 8.3 x 11.7 inches, for report the whole page is needed
+        # for other outputs we need half the hight (i.e. A5 in landscape orientation)
+        if pdf_report:
+            fig, ax = plt.subplots(3, 1, figsize=(8.3, 11.7))
+            cbar_shrink = 0.3
+            cbar_orient = "horizontal"
+            heatmap_axis = ax[0]
+            axis_aspect = ["auto"]
+        else:
+            fig = plt.figure(figsize=(8.3, 11.7 / 2), tight_layout=True)
+            cbar_shrink = 8 / 12
+            cbar_orient = "vertical"
+            heatmap_axis = fig.gca()
+            axis_aspect = ["equal", "box"]
+        fig.suptitle(
+            title,
+            fontweight="bold",
+        )
+        # create the heatmap
+        # NOTE in some rare cases when the heatmap consists of a single sample matplotlib will raise a warning:
+        """
+        RuntimeWarning: invalid value encountered in less_equal
+        b = b[(b <= intv[1] + eps) & (b >= intv[0] - eps)]
+        RuntimeWarning: invalid value encountered in greater_equal
+        b = b[(b <= intv[1] + eps) & (b >= intv[0] - eps)]
+        """
+        c = heatmap_axis.pcolor(plate96, edgecolors="k", cmap=cmap, vmin=vmin, vmax=1)
+
+        # cycle through all wells and write there the ID
+        for i in plate96.index:
+            for j in plate96.columns:
+                x = plate96.columns.get_loc(j)
+                y = plate96.index.get_loc(i)
+                heatmap_axis.text(
+                    x + 0.5,
+                    y + 0.5,
+                    i + str(j),
+                    horizontalalignment="center",
+                    verticalalignment="center",
+                )
+
+        # y axis has to be inverted so that well A1 is in top left corner
+        heatmap_axis.invert_yaxis()
+        # addtional hacks: enforce square size of wells, hide axes and ticks
+        heatmap_axis.set_aspect(*axis_aspect)
+        heatmap_axis.axis("off")
+        # create a colorbar with text labels
+        cbar = fig.colorbar(
+            c,
+            ax=heatmap_axis,
+            ticks=tick_values,
+            shrink=cbar_shrink,
+            orientation=cbar_orient,
+        )
+        # set colorbar ticks depending on the requested orientation
+        if cbar_orient == "horizontal":
+            cbar.ax.set_xticklabels(tick_labels)
+        elif cbar_orient == "vertical":
+            cbar.ax.set_yticklabels(tick_labels)
+        # label is not needed, because it will be in the figure title
+
+        if pdf_report:
+            # return the figure object for subsequent manipulations
+            return (fig, ax)
+
+        if save:
+            plt.savefig(
+                os.path.join(output_path, "heatmap_" + str(use_column) + ".png"),
+                dpi=(200),
+                tight_layout=True,
+            )
+        # clean up after plotting so that no parameters are carried over to genpics
+        plt.close("all")
+        return None
+
+    def print_message(self, text, message_type):
+        """Prints messages and saves them to protocolString.
+
+        Parameters
+        ----------
+        text
+            message text
+        message_type : i/w/f
+            type of message (Information, Warning, Fatal)
+        """
+        # line No and file name are only printed in debug mode and only for Fatals and Warnings
+        if self.debug and (message_type != "i"):
+            cf = currentframe()
+            cfFilename = getframeinfo(cf).filename
+            print(f"Line {cf.f_back.f_lineno}, in file {cfFilename}")
+        if message_type == "f":
+            print("Fatal: " + text + f" ({self.readout_type})")
+            sys.exit(1)
+        elif message_type == "w":
+            msg = "Warning: " + text + f" ({self.readout_type})"
+            print(msg)
+            msg = msg + "\n"
+            self.protocolString = self.protocolString + msg
+        elif message_type == "i":
+            msg = "Information: " + text + f" ({self.readout_type})"
+            print(msg)
+            msg = msg + "\n"
+            self.protocolString = self.protocolString + msg
+        else:
+            raise ValueError(f"Unknown message type '{message_type}'")
+
+    def _calculate_raw_corr(self):
+        """Compute baseline-corrected raw data; requires presence of kN, bN, kU, bU in plate_results.
+
+        Notes:
+        -----
+        calculate fraction unfolded (funf)
+        How to derive the formula:
+        K_eq(T) = (Fn(T) - F(T)) / (F(T) - Fu(T))
+        K_eq() = funf/fnat = funf / (1-funf)
+        funf = (Fn(T) - F(T)) / (Fn(T) - Fu(T))
+        where T is temperature, F(T) assay signal, Fu and Fn - baseline signal
+        funf - fraction unfolded; fnat - fraction native
+        whichever formula is used for calculation, the result is the same
+
+        TODO: the same calculation using plate_fit can be done to yield "fit" variant of funf
+        """
+        # initiate an empty DataFrame
+        self.plate_raw_corr = pd.DataFrame(index=self.plate.index)
+
+        # a helper function for pandas.apply()
+        def calculate_raw_corr_series(input_series):
+            # NOTE the badly fitted samples will be present in the column names of self.plate
+            # but they are not present in the index of self.plate_results, thus we need to check for it
+            # Obviously the calculation cannot be done for the curves that could not be fit
+            if input_series.name in self.plate_results.index:
+                # here we need transposed plate_results to have sample ID's in the columns
+                output = (
+                    input_series.index * self.plate_results["kN_fit"][input_series.name]
+                    + self.plate_results["bN_fit"][input_series.name]
+                    - input_series
+                ) / (
+                    input_series.index * self.plate_results["kN_fit"][input_series.name]
+                    + self.plate_results["bN_fit"][input_series.name]
+                    - input_series.index
+                    * self.plate_results["kU_fit"][input_series.name]
+                    - self.plate_results["bU_fit"][input_series.name]
+                )
+                # calculation above produces an index object, which we have to convert to a pd.Series
+                output = pd.Series(
+                    output,
+                    index=self.plate.index,
+                    name=input_series.name,
+                )
+                self.plate_raw_corr = pd.concat(
+                    [self.plate_raw_corr, pd.Series(output)],
+                    axis=1,
+                )
+
+        self.plate.apply(calculate_raw_corr_series)
+
+    def _estimate_baseline(self, input_series, fit_length, estimate_Tm=False):
+        """Estimates pre- and post-transition baselines for a series
+        function for a single Series (index is Temperature, name is sample ID, values are RFU's).
+
+        Parameters
+        ----------
+        input_series
+            pd.Series with Temperature in index, name is sample ID, values are the signal
+        fit_length
+            number of degrees to be used from the start or end of data for fitting
+        estimate_Tm
+            additionally estimate melting temperature Tm with a heuristic
+        """
+        # convert fit length in temperature degrees to datapoint number (using self.dT)
+        fit_datapoints = int(fit_length / self.dT)
+        # NOTE do not use Nan's to prevent issues during fitting
+        pre_fit, pre_covm = np.polyfit(
+            input_series.dropna().iloc[:fit_datapoints].index,
+            input_series.dropna().iloc[:fit_datapoints],
+            1,
+            cov=True,
+        )
+        post_fit, post_covm = np.polyfit(
+            input_series.dropna().iloc[-fit_datapoints:].index,
+            input_series.dropna().iloc[-fit_datapoints:],
+            1,
+            cov=True,
+        )
+        self.plate_results.loc[["kN_init"], [input_series.name]] = pre_fit[0]
+        self.plate_results.loc[["bN_init"], [input_series.name]] = pre_fit[1]
+        self.plate_results.loc[["kU_init"], [input_series.name]] = post_fit[0]
+        self.plate_results.loc[["bU_init"], [input_series.name]] = post_fit[1]
+        pre_stdev = np.sqrt(np.diagonal(pre_covm))
+        post_stdev = np.sqrt(np.diagonal(pre_covm))
+        # estimate_Tm stdev of each parameter and write it to plate_results_stdev (used to set bounds)
+        self.plate_results_stdev.loc[["kN_init"], [input_series.name]] = pre_stdev[0]
+        self.plate_results_stdev.loc[["bN_init"], [input_series.name]] = pre_stdev[1]
+        self.plate_results_stdev.loc[["kU_init"], [input_series.name]] = post_stdev[0]
+        self.plate_results_stdev.loc[["bU_init"], [input_series.name]] = post_stdev[1]
+
+        if estimate_Tm:
+            # now the Tm part - find the maximum of smoothened derivative for S-shaped curves (low-to-high)
+            # or the minimum for Z-shaped ones
+            # intersection of the difference line with zero -> intersection of baselines
+            dintersect = -(post_fit[1] - pre_fit[1]) / (post_fit[0] - pre_fit[0])
+            # min/max/middle temperature range
+            tmin = min(input_series.index)
+            tmax = max(input_series.index)
+            tmid = (tmin + tmax) / 2
+            # value of baseline difference at tmid
+            b_diff = (post_fit[0] - pre_fit[0]) * tmid + (post_fit[1] - pre_fit[1])
+
+            if (dintersect > tmin + fit_length) and (dintersect < tmax - fit_length):
+                # NOTE rule out intersecting baselines - in this case post-baseline is not always
+                # above or below the pre-baseline
+                self.plate_results.loc["Tm_init", input_series.name] = tmid
+            else:
+                if b_diff > 0:
+                    # low-to-high curve - use max of the deriv as Tm_init
+                    self.plate_results.loc[
+                        "Tm_init",
+                        input_series.name,
+                    ] = self.plate_derivative[input_series.name].idxmax()
+                elif b_diff < 0:
+                    # high-to-low curve - use min of the deriv as Tm_init
+                    self.plate_results.loc[
+                        "Tm_init",
+                        input_series.name,
+                    ] = self.plate_derivative[input_series.name].idxmin()
+                else:
+                    # rare case - curves are identical raise a warning and use mid-range
+                    self.print_message(
+                        "Baselines are identical in sample {}".format(
+                            input_series.name,
+                        ),
+                        "w",
+                    )
+                    self.print_message(
+                        "Using the middle of temperature range as Tm_init",
+                        "i",
+                    )
+                    self.plate_results.loc["Tm_init", input_series.name] = tmid
+
+    def _calc_Tons(self, Tm_col, dHm_col, onset_threshold):
+        """Computes onset temperature Tons based on supplied column names with dHm and Tm
+        and adds the value to plate_results.
+
+        Parameters
+        ----------
+        Tm_col
+            column with Tm values (Tm_fit, Tm1_fit, etc)
+        dHm_col
+            column with slope values
+        onset_threshold
+            fraction unfolded that corresponds to onset of unfolded (e.g. 0.01 - 1% must be unfolded)
+        """
+        self.plate_results["T_onset"] = 1 / (
+            1 / self.plate_results[Tm_col]
+            - R
+            / self.plate_results[dHm_col]
+            * np.log(onset_threshold / (1 - onset_threshold))
+        )
+
+        # also calculate stdev for T_onset using error propagation from Tm_fit and dHm_fit
+        self.plate_results_stdev["T_onset"] = (
+            np.sqrt(
+                (self.plate_results_stdev[Tm_col] / self.plate_results[Tm_col]) ** 2
+                + (self.plate_results_stdev[dHm_col] / self.plate_results[dHm_col]) ** 2
+                + (
+                    np.sqrt(
+                        self.plate_results_stdev[dHm_col] ** 2
+                        + (
+                            R
+                            * np.log(onset_threshold / (1 - onset_threshold))
+                            * self.plate_results_stdev[Tm_col]
+                        )
+                        ** 2,
+                    )
+                    / (
+                        self.plate_results[Tm_col]
+                        - R
+                        * np.log(onset_threshold / (1 - onset_threshold))
+                        * self.plate_results[Tm_col]
+                    )
+                )
+                ** 2,
+            )
+            * self.plate_results.T_onset
+        )
+
+    def plotfig(
+        self,
+        output_path,
+        wellID,
+        datatype="overview",
+        save=True,
+        show=False,
+        data_ax=None,
+        vline_legend=False,
+    ):
+        """Plot the curves from individual wells.
+        Creates two subplots - top the fit + data, lower - derivative.
+
+        Parameters
+        ----------
+        output_path
+            where to write the file (can be also a dummy value)
+        wellID
+            from which well to plot
+        datatype
+            what is being plotted:
+                > overview - plot experimental data , fit data and some fit parameters
+                > very_raw - a workaround parameter plotting from plate attribute; made for the GUI when the data is loaded, but not processed yet
+                > raw - plot unprocessed data
+                > normalized - data after preprocessing
+                > derivative - first derivative
+                > fitted - based on the equation
+        save
+            actually save the file
+        show
+            show image instead
+        data_ax
+            instead of creating new ones, plot in these axes (for PDF reports: disables derivative plot and legend plot)
+        vline_legend
+            if True then all vlines will be added to the legend (looks bad when individual images are saved)
+        """
+        if data_ax is None:
+            # create the figure object
+            fig = plt.figure(1, figsize=(8, 7))
+            # create a specification for the relative plot sizes
+            gs = gridspec.GridSpec(3, 1, height_ratios=[4, 2, 0.05], figure=fig)
+            # get objects of individual subplots
+            data_ax = fig.add_subplot(gs[0])  # experimental data, fit, etc
+            deriv_ax = fig.add_subplot(gs[1])  # the derivative
+        else:
+            data_ax = data_ax
+            deriv_ax = None
+
+        # NOTE currently all internal manipulations are done in K
+        # and conversion back to original scale is not done
+        if self.denaturant == "K" or self.denaturant == "C":
+            degree_sign = "K"
+        else:
+            # NOTE this is a placeholder for chemical denaturant scale
+            pass
+
+        # a carry-over from the cycle
+        i = wellID
+
+        if datatype == "overview":
+            # plot the fit
+            # plot the experimental data
+            # TODO use markevery=n to plot every n-th datapoint
+            # format used to be kx, however, for bigger datasets this doesn't look good
+            data_ax.plot(
+                self.plate[i].index.values,
+                self.plate[i],
+                "k.",
+                mew=1,
+                label="Experiment",
+            )  # , markevery=40)
+
+            data_ax.plot(
+                self.plate[i].index.values,
+                self.plate_fit[i],
+                label="Fit",
+            )
+
+            # a label for Y-axis
+            data_ax.set_ylabel(self.readout_type)
+
+            # calculate the offsets for the plot based on the overall length
+            max_val = max(self.plate[i].dropna())
+            min_val = min(self.plate[i].dropna())
+            y_range = max_val - min_val
+            data_ax.set_ylim([min_val - 0.1 * y_range, max_val + 0.1 * y_range])
+
+            # force specific range for x-axis
+            data_ax.set_xlim(self.xlim)
+        else:
+            # HACK hide the derivative plot
+            if deriv_ax is not None:
+                deriv_ax.set_visible(False)
+            # set the source dataframe based on the supplied option
+            if datatype == "very_raw":
+                sourcedf = self.plate
+                ylabel = self.readout_type
+            elif datatype == "raw":
+                sourcedf = self.plate_raw
+                ylabel = self.readout_type
+            elif datatype == "normalized":
+                # after all processing normalized curves are in the plate variable
+                sourcedf = self.plate
+                ylabel = self.readout_type
+            elif datatype == "derivative":
+                sourcedf = self.plate_derivative
+                ylabel = "dRFU/dT"
+            elif datatype == "fitted":
+                sourcedf = self.plate_fit
+                ylabel = "Fitted RFU"
+            else:
+                self.print_message("Invalid plotting source requested", "w")
+
+            if datatype == "derivative":
+                # for derivative plots plot all values
+                plt.plot(sourcedf[i].index.values, sourcedf[i], label=i)
+            else:
+                data_ax.plot(
+                    sourcedf[i].index.values,
+                    sourcedf[i],
+                    "k.",
+                    mew=1,
+                    label="Experiment",
+                )
+            data_ax.set_ylabel(ylabel)
+
+        data_ax.set_xlabel(f"Temperature, {degree_sign}")
+        data_ax.set_title("Sample " + str(i), fontsize=12, y=1.05)
+        data_ax.grid(True, which="both")
+
+        # commands specific only to overview mode:
+        if datatype == "overview":
+            # plot the determined baselines
+            # create np.poly1d objects with respective fit parameters
+            # TODO for line just 2 points are needed...
+            poly_pre = np.poly1d(
+                *self.plate_results.loc[[i], ["kN_fit", "bN_fit"]].values,
+            )
+            poly_post = np.poly1d(
+                *self.plate_results.loc[[i], ["kU_fit", "bU_fit"]].values,
+            )
+            data_ax.plot(
+                self.plate[i].index.values,
+                poly_post(self.plate[i].index.values),
+                label="Post- baseline",
+                linestyle="--",
+            )
+            data_ax.plot(
+                self.plate[i].index.values,
+                poly_pre(self.plate[i].index.values),
+                label="Pre- baseline",
+                linestyle="--",
+            )
+
+            # visualization of the lines requested (specific to different model types)
+            # NOTE this would not print the value/stdev on the plot, has to be done separately
+            for parameter_name in self.plotlines:
+                if vline_legend:
+                    data_ax.axvline(
+                        self.plate_results[parameter_name][i],
+                        ls="dotted",
+                        c="b",
+                        lw=3,
+                        label=parameter_name,
+                    )
+                else:
+                    # NOTE in this case lines are not labeled so that they are not listed in the legend
+                    data_ax.axvline(
+                        self.plate_results[parameter_name][i],
+                        ls="dotted",
+                        c="b",
+                        lw=3,
+                    )
+                    # add text with the parameter used to generate the line - doesn't look nice in some caes
+                    data_ax.text(
+                        self.plate_results[parameter_name][i],
+                        data_ax.get_ylim()[0] + 0.05 * y_range,
+                        " " + parameter_name,
+                        fontsize=12,
+                    )
+            if deriv_ax is not None:
+                fig.legend(loc="lower center", ncol=4, fontsize=12)
+                # commands for derivative plot (used only in overview mode)
+                deriv_ax.plot(
+                    self.plate[i].index.values,
+                    self.plate_derivative[i],
+                    color="k",
+                )
+                deriv_ax.set_xlabel(f"Temperature, {degree_sign}")
+                deriv_ax.set_ylabel(f"d({self.readout_type})/dT")
+
+                # delete the X-label on the data axes
+                data_ax.set_xlabel("")
+
+                # xlim for derivative plot and data plot must be the same!
+                deriv_ax.set_xlim(self.xlim)
+                deriv_ax.grid(True, which="both")
+
+        if deriv_ax is None:
+            # if data_ax was provided externally, then showing/saving should not be done
+            return
+        if show:
+            plt.show()
+        elif save:
+            plt.savefig(output_path + "/" + str(i) + ".png", dpi=(100))
+        plt.close("all")
+
+    ## internal methods for creating HTML report elements
+    def html_heatmap(self, heatmap_cmap, display):
+        """Returns a div-block with a heatmap of the sortby parameter (the last column in self.plate_results).
+
+        Parameters
+        ----------
+        heatmap_cmap
+            matplotlib colormap for heatmap
+        display
+            whether the heatmap is shown or not in the final HTML:
+                > table (standard view)
+                > none (not visible)
+                > block (compact view)
+
+        Notes:
+        -----
+        The heatmap that the user sees when opening the HTML will have display=table, all other will start as display=none
+        """
+        # this string template corresponds to a single sample entry (has to be wrapped within rows)
+        # NOTE when gray cells are clicked a window still pops up but says that there is no such image
+        # due to limitations of possible attributes within html, the possible layout info is stored in the
+        # title attribute which also gets displayed as a tooltip (and will be also shown in the bottom of the heatmap)
+        sample = '        <div class="Cell" onmouseover="mouseOver(this.id, this.title)" title="$CONDITION" id="$ID" onclick="window.open(window.currentHeatmap + \'_resources/$ID.png\',\'Sample $ID\', \'width=450,height=450\')" style="background-color:$COLOR">$ID</div>\n'
+        sample = Template(sample)
+        # row template
+        # TODO add extra spaces for readability of output HTML file
+        row = '<div class="Row" id=$ROWNAME>\n$SAMPLES</div>'
+        row = Template(row)
+
+        # extract the heatmap from matplotlib
+        cmap = plt.get_cmap(heatmap_cmap)
+
+        # by convention the model-specific final sorting parameter is stored in the last column
+        # TODO add support creating HTML heatmaps for an arbitrary column
+        colors = self.plate_results.iloc[:, -1]
+        colors = normalize(colors)
+
+        # convert numbers to HEX colors
+        colors = colors.apply(lambda x: rgb2hex(cmap(x)))
+
+        # outer cycle creates rows, inner cycle creates lines for 1-12
+        row_output = ""
+        for i in ["A", "B", "C", "D", "E", "F", "G", "H"]:
+            line = ""
+            for j in range(1, 13):
+                # if a respective color exists then colorise the <div>
+                # if not, then set it to "lightgray"
+                sample_id = str(i) + str(j)
+                # NOTE modify this line to add additional information (e.g. Tm) to the text under heatmap
+                CONDITION = self.layout["Condition"][sample_id]
+
+                if sample_id in colors.index:
+                    line = line + sample.substitute(
+                        ID=sample_id,
+                        COLOR=colors[sample_id],
+                        CONDITION=CONDITION,
+                    )
+                else:
+                    # lightgray blends with mid-range of coolwarm, so change to gray
+                    line = line + sample.substitute(
+                        ID=sample_id,
+                        COLOR="gray",
+                        CONDITION=CONDITION,
+                    )
+            row_output = row_output + row.substitute(ROWNAME="Row_" + i, SAMPLES=line)
+
+        # once the heatmap itself is created, it is wrapped around in the additional table
+        # that would control if the hm is shown, define the title, etc
+        output_template = '<div class="Table" id=$IDENTIFIER style="display:$DISPLAY">\n    <div class="Title">\n        <p>$TITLE_TEXT</p>\n        <i style="font-size:0.8em; font-weight:normal"> Click on the wells to open plots in a separate window </i>\n    </div>\n    $HEATMAP\n</div>'
+        output_template = Template(output_template)
+        title_text = "{}: heatmap of {} (model <i>{}</i>)".format(
+            self.readout_type,
+            self.plate_results.iloc[:, -1].name,
+            self.model,
+        )
+        return output_template.substitute(
+            DISPLAY=display,
+            IDENTIFIER=self.readout_type,
+            TITLE_TEXT=title_text,
+            HEATMAP=row_output,
+        )
+
+    def html_button(self):
+        """Returns an HTML button string with the readout name on top."""
+        button_template = '<input title="Switch readout to $IDENTIFIER" type="button" onclick="openHeatmap(this.value)" value="$IDENTIFIER" style="float:left" />\n'
+        button_template = Template(button_template)
+        return button_template.substitute(IDENTIFIER=self.readout_type)
+
+    ## Methods for GUI communication
+    def printAnalysisSettings(self):
+        """Prints current analysis settings."""
+        for setting, def_value in analysis_defaults.items():
+            if setting[-2:] == "_h":
+                # filter out the help message entries
+                pass
+            else:
+                print(
+                    "{} = {} (default: {})".format(
+                        setting,
+                        getattr(self, setting),
+                        def_value,
+                    ),
+                )
+        print("\n")
+
+    def analysisHasBeenDone(self):
+        """Check if analysis compleded and self.plate_results is created."""
+        return "plate_results" in self.__dict__
+
+    def testWellID(self, wellID, ignore_results=False):
+        """Check if a well exists in self.plate_results (return True), otherwise return False.
+
+        Parameters
+        ----------
+        wellID
+            sample ID to check
+        ignore_results: bool
+            if True, will look for the ID's in plate_raw, even if self.plate_results exists
+        """
+        # check if analysis was done, and depending on that choose the index to check
+        if "plate_results" in self.__dict__:
+            index_tocheck = self.plate_results.index
+        else:
+            index_tocheck = self.plate_raw.columns
+
+        if ignore_results:
+            index_tocheck = self.plate_raw.columns
+
+        return wellID in index_tocheck
+
+    def getResultsColumns(self):
+        """Returns a tuple of columns from self.plate_results that are relevant for GUI."""
+        if "plate_results" in self.__dict__:
+            output = [self.plate_results.iloc[:, -1].name, *self.plotlines]
+
+            # BS-factor is more useful than S, but not always available
+            if "BS_factor" in self.plate_results.columns:
+                output = [*output, "BS_factor"]
+            else:
+                output = [*output, "S"]
+            return output
+        else:
+            self.print_message("No plate_results attribute found", "w")
+            self.print_message("Please perform analysis first", "i")
+            return None
+
+    ## Big methods for data input, processing and output
+    def SetAnalysisOptions(
+        self,
+        model=analysis_defaults["model"],
+        baseline_fit=analysis_defaults["baseline_fit"],
+        baseline_bounds=analysis_defaults["baseline_bounds"],
+        dCp=analysis_defaults["dCp"],
+        onset_threshold=analysis_defaults["onset_threshold"],
+        savgol=analysis_defaults["savgol"],
+        # these are pre-processing options (defaults stored in a different dict)
+        blanks=prep_defaults["blanks"],  # TODO set in layout instead?
+        exclude=prep_defaults["exclude"],  # TODO set in layout instead?
+        invert=prep_defaults["invert"],
+        mfilt=prep_defaults["mfilt"],
+        shrink=prep_defaults["shrink"],
+        trim_max=prep_defaults["trim_max"],
+        trim_min=prep_defaults["trim_min"],
+        # TODO layouts must be handled somewhere else...
+        layout=None,
+        layout_input_type="csv",
+    ):
+        """Sets in the MoltenProt instance all analysis-related parameters that will then  be used by methods PrepareData() and ProcessData(). For parameter description see analysis_defaults and prep_defaults dicts.
+
+        References:
+        ----------
+        https://homepages.inf.ed.ac.uk/rbf/HIPR2/median.htm
+        the median filter: how it works and why it may be better than the mean
+        """
+        self.model = model
+        self.baseline_fit = baseline_fit
+        self.baseline_bounds = baseline_bounds
+        # current value for onset_threshold (used in santoro1988(d) to obtain values of T_onset)
+        self.onset_threshold = 0.01
+        # the value for savgol window to compute the derivative
+        self.savgol = savgol
+
+        self.blanks = blanks
+        self.exclude = exclude
+        self.invert = invert
+        # to avoid confustion, medfilt is the imported method, mfilt is the respective analysis flag
+        self.mfilt = mfilt
+        self.shrink = shrink
+        # NOTE changes made by trim_min/trim_max are saved directly to self.plate
+        self.trim_min = trim_min
+        self.trim_max = trim_max
+
+        # NOTE to prevent carry-over from previous run (e.g. in JSON) reset the bad fit list
+        self.bad_fit = []
+
+        # TESTING setting layout and dCp moved to a separate method
+        self.SetLayout(layout=layout, layout_input_type=layout_input_type, dCp=dCp)
+
+    def SetLayout(
+        self,
+        layout=None,
+        layout_input_type="csv",
+        dCp=analysis_defaults["dCp"],
+    ):
+        """Sets layout and dCp."""
+        if layout is not None:
+            if layout_input_type == "csv":
+                try:
+                    # the format for layout is more strict: it is always a csv with commas as separators
+                    # and index column called ID; more restrictions will follow
+                    # TODO add check for the size of layout DataFrame - should be always 96
+                    # NOTE it may be better to use ";" as csv separator, because it would be easier to write stuff
+                    self.layout = pd.read_csv(layout, index_col="ID", encoding="utf_8")
+                except:
+                    self.print_message(
+                        "Unsupported layout format! No layout info will be available",
+                        "w",
+                    )
+                    self.layout = None
+            elif layout_input_type == "from_xlsx":
+                self.layout = layout
+            # Read blanks and skipped samples from the layout information
+            self.blanks = list(
+                self.layout[self.layout["Condition"].astype(str) == "Blank"].index,
+            )
+            self.exclude = list(
+                self.layout[self.layout["Condition"].astype(str) == "Ignore"].index,
+            )
+        else:
+            # in all other cases set the instance attribute to None
+            self.layout = None
+
+        # heat capacity change values for the whole plate
+        # self.dCp can be one of the following: "from_layout" or value specified by the user
+        # TODO overwrite layout value instead
+        if dCp >= 0:
+            # user-set dCp from CLI overrides dCp supplied in the layout
+            self.print_message(f"dCp for all samples is set to {dCp}", "i")
+            self.dCp = dCp
+        elif self.layout is not None:
+            # if there is a layout, there may or may not be dCp values
+            self.print_message(
+                "Using per-sample dCp values as provided in the layout (invalid values will be turned to 0)",
+                "i",
+            )
+            # ensure that dCp values are numbers, but not something else
+            # if an invalid value occurs, set it to the default value of 0
+            self.layout["dCp"] = pd.to_numeric(self.layout["dCp"], errors="coerce")
+            self.layout["dCp"].fillna(0, inplace=True)
+            # also negative values must be turned to 0
+            self.layout.loc[layout["dCp"] < 0, ["dCp"]] = 0
+            self.dCp = "from_layout"
+        else:
+            msg = f"Incorrect value for dCp ({dCp})!"
+            raise ValueError(msg)
+
+    def SetFixedParameters(self, fixed_params):
+        """Takes a dataframe with alphanumeric columns (A1-H12) and index being names of
+        the parameters (e.g. Tf_fit, Ea_fit) and sets it as the attribute self.fixed_params.
+
+        Notes:
+        -----
+        No sanity checks of the input are currently done
+        """
+        self.fixed_params = fixed_params
+
+    def PrepareData(self):
+        """Prepares input data for processing."""
+        # copy raw data to the main plate
+        # NOTE this is primarily needed for json i/o, to ensure that the analysis runs
+        # are the same after save/load cycle
+        # also the dT step size must be reset
+        self.plate = self.plate_raw.copy()
+        self.dT = (max(self.plate.index) - min(self.plate.index)) / (
+            len(self.plate.index) - 1
+        )
+
+        # Remove all-Nan columns
+        self.plate = self.plate.dropna(how="all", axis=1)
+
+        # remove the user-specified unneeded wells
+        if self.exclude:
+            self.plate = self.plate.drop(
+                self.plate.columns.intersection(self.exclude),
+                axis=1,
+            )
+
+        # trim data from the beggining or end
+        if self.trim_min:
+            self.plate = self.plate[
+                self.plate.index >= min(self.plate.index) + self.trim_min
+            ]
+        if self.trim_max:
+            self.plate = self.plate[
+                self.plate.index <= max(self.plate.index) - self.trim_max
+            ]
+
+        # invert the curves
+        if self.invert:
+            # reflect the curve relative to the x-axis
+            self.plate = self.plate * -1
+            # normalize
+            self.plate = self.plate.apply(normalize, from_input=True)
+
+        # if blank wells were specified, average them and subtract from the remaining data
+        if self.blanks:
+            self.print_message("Subtracting background...", "i")
+            try:
+                bg = self.plate[self.blanks].mean(axis=1)
+                # remove (drop) buffer-only columns from the dataset
+                self.plate = self.plate.drop(
+                    self.plate.columns.intersection(self.blanks),
+                    axis=1,
+                )
+                # subtract the background
+                self.plate = self.plate.sub(bg, axis=0)
+            except KeyError:
+                self.print_message("Same well was supplied as Blank and Ignore", "f")
+                print(
+                    "Please check parameters for --blank --exclude and the layout file",
+                )
+
+        # apply median filter
+        if self.mfilt:
+            # convert the temperature range to an odd integer window size
+            self.mfilt = to_odd(self.mfilt, self.dT)
+
+            # check if the window is bigger than the whole dataset
+            if len(self.plate.index) < self.mfilt:
+                # NOTE the output may be confusing to the user, because user gives degrees, but gets datapoints
+                msg = f"Specified medfilt window size ({self.mfilt} datapoints) is bigger than the dataset length ({len(self.plate.index)} datapoints)!"
+                raise ValueError(
+                    msg,
+                )
+
+            self.plate = self.plate.apply(medfilt, kernel_size=self.mfilt)
+
+        # NOTE this must be done after median filtering (spikes are bad for averaging)
+        if self.shrink is not None:
+            if self.shrink > self.dT:
+                # create the range for Temperature binning (will be also used as the index)
+                bin_range = np.arange(
+                    np.floor(min(self.plate.index)),
+                    np.ceil(max(self.plate.index)) + self.shrink,
+                    self.shrink,
+                )
+                # temporary DataFrame for binning
+                self.plate_binned = pd.DataFrame(
+                    index=bin_range[:-1],
+                    columns=self.plate.columns,
+                    dtype="float64",
+                )
+
+                # average values using the temperature range (plate_binned index, plate_binned index + bin step)
+                for i in self.plate_binned.index:
+                    self.plate_binned.loc[i, :] = self.plate[
+                        (self.plate.index > i) & (self.plate.index < i + self.shrink)
+                    ].mean()
+
+                self.plate = self.plate_binned
+                # remove possible empty rows
+                self.print_message(
+                    f"Input data binned down to {self.shrink} degree step",
+                    "i",
+                )
+                # required for proper derivative computation
+                self.dT = self.shrink
+            else:
+                msg = f"Requested shrinking step ({self.shrink} degrees) is less than the average temperature step ({self.dT} degrees)"
+                raise ValueError(
+                    msg,
+                )
+
+        # compute the derivative
+        # NOTE the best way to do it for any type of data - savgol filtering
+        # which is not present in older scipy versions. In this case use a less correct approach
+        # NOTE it is not smart to save converted values of savgol to self.savgol, rather use internal window_length var
+        try:
+            from scipy.signal import savgol_filter
+
+            # convert window size in temperature units to an odd integer
+            window_length = to_odd(self.savgol, self.dT)
+            # check if the window is bigger than the whole dataset
+            if len(self.plate.index) < window_length:
+                # NOTE the output may be confusing to the user, because user gives degrees, but gets datapoints
+                msg = f"Specified savgol window size ({window_length} datapoints) is bigger than the dataset length ({len(self.plate.index)} datapoints)!"
+                raise ValueError(
+                    msg,
+                )
+
+            # NOTE an additional check for savgol requirements (4 is polyorder used by default)
+            if window_length < 4:
+                msg = f"Specified savgol window size ({window_length} datapoints) is smaller than the polynomial order of the filter (4); increase savgol window size, or do not shrink the data"
+                raise ValueError(
+                    msg,
+                )
+            """
+            NOTE default mode for savgol is interp, which just uses polynomial fit of the last window to
+            create the data values beyond the end of the dataset. This may create tails in the derivative
+            that interfere with peak detection. "nearest" mode just takes the first value and uses it
+            for the missing parts of the window at data edges; see here for other modes:
+            https://docs.scipy.org/doc/scipy-0.16.1/reference/generated/scipy.signal.savgol_filter.html
+            """
+            self.plate_derivative = self.plate.apply(
+                savgol_filter,
+                window_length=window_length,
+                polyorder=4,
+                deriv=1,
+                mode="nearest",
+            )
+        except:
+            """
+            NOTE usually, this exception would happen with old scipy version
+            However, this also happened in pyinstaller bundle for Windows:
+            LinAlgError: SVD did not converge in Linear Least Squares
+            The derivate is not a crucial element (fits usually converge even with a random Tm guess),
+            so it should be OK to proceed.
+            """
+            self.print_message(
+                "Cannot compute a smoothened derivative, fit results can be suboptimal",
+                "w",
+            )
+            self.print_message("Falling back to the difference method", "i")
+            self.plate_derivative = (self.plate - self.plate.shift(periods=1)) / self.dT
+
+    def ProcessData(self):
+        """Performs curve fitting and creates results dataframes.
+
+        Notes:
+        -----
+        The names for the parameters and the contents of plate_results variable are different for various methods, so they are set up based on the selected analysis
+        """
+        if self.model == "skip":
+            self.print_message("Dataset was omitted from analysis", "i")
+            # check if there are plate_results attributes from previous analysis and delete them
+            # NOTE there are more leftover attributes which will be saved to JSON, but without plate_results the dataset should not be recognized as a processed one
+            if self.analysisHasBeenDone():
+                del self.plate_results
+                del self.plate_results_stdev
+            # return statement allows to silently terminate the ProcessData method
+            return
+
+        # check if model name is correct
+        if self.model not in avail_models.keys():
+            msg = f"Unknown model '{self.model}'"
+            raise ValueError(msg)
+
+        model = avail_models[self.model](scan_rate=self.scan_rate)
+
+        self.print_message("Processing data...", "i")
+
+        # Set here scan rate for the model
+        # None - not specified (e.g. for CSV), doesn't matter for equilibrium and empirical models; can be set in CLI
+        # XLSX - parsed from Excel and always available
+        # kinetic models should raise a ValueError
+        model.scan_rate = self.scan_rate
+
+        # the function to do curve fitting
+        f = model.fun
+        # generate parameter names
+        result_index = []
+        for i in model.param_names():
+            result_index.append(i + "_init")
+        for i in model.param_names():
+            result_index.append(i + "_fit")
+        result_index.append("S")
+
+        # select dataframe to be fit (e.g. can be changed to plate_derivative)
+        df_for_fitting = self.plate
+        # create dataframe to store results and populate with initial parameter values
+        # NOTE by default empty dataframes get "object" data type, which can cause problems when using numpy
+        # if the datatype is specified explicitly, the problem should not exist
+        self.plate_results = pd.DataFrame(
+            index=result_index,
+            columns=self.plate.columns.values,
+            dtype="float64",
+        )
+
+        # create a dataframe to store stdev for fit parameters
+        # for simplicity the dataframe will also have initial parameters included
+        # they will be dropped in the end of processing
+        self.plate_results_stdev = pd.DataFrame(
+            index=result_index,
+            columns=self.plate.columns.values,
+            dtype="float64",
+        )
+
+        # an empty p0 variable
+        p0 = []
+        # run a cycle through all columns and calculate fits
+        self.print_message("Fitting curves...", "i")
+
+        for i in df_for_fitting.columns.values:
+            # drop Nan values to prevent crashes of fitting
+            data = df_for_fitting[i].dropna()
+            T = np.float64(data.index.values)
+            # guess initial parameters
+            p0 = model.param_init(data)
+            self.plate_results[i][0 : len(p0)] = p0
+            # get parameter bounds
+            param_bounds = model.param_bounds(data)
+            # if the model has these parameters, then run a more precise initial parameter estimation
+            if {"kN", "bN", "kU", "bU"}.issubset(model.param_names()):
+                # furthermore, if there is a Tm than run smart Tm pre-estimation
+                estimate_Tm = "Tm" in model.param_names()
+                self._estimate_baseline(
+                    data,
+                    fit_length=self.baseline_fit,
+                    estimate_Tm=estimate_Tm,
+                )
+
+                # Set bounds for kN, bN, kU, bU to be a multiple of stdev of baseline-prefitting
+                # NOTE this assumes that the parameters are the first four in the list of bounds/p0
+                # originally param_bounds is a tuple, so it has to be converted to a list...
+                if self.baseline_bounds > 0:
+                    param_bounds = list(param_bounds)
+                    param_bounds[0] = list(param_bounds[0])
+                    param_bounds[1] = list(param_bounds[1])
+                    param_bounds[0][:4] = list(
+                        self.plate_results[i][:4]
+                        - self.plate_results_stdev[i][:4] * self.baseline_bounds,
+                    )
+                    param_bounds[1][:4] = list(
+                        self.plate_results[i][:4]
+                        + self.plate_results_stdev[i][:4] * self.baseline_bounds,
+                    )
+                elif self.baseline_bounds < 0:
+                    msg = f"Expected a non-negative int for baseline_bounds, but got {self.baseline_bounds}"
+                    raise ValueError(
+                        msg,
+                    )
+
+            # if this MoltenProtFit instance has any fixed parameter dataframe,
+            # they will be supplied to the model
+            if self.fixed_params is not None:
+                model.set_fixed(list(self.fixed_params[i]))
+            try:
+                # for some reason, pandas Series for initial parameters have to be converted to a list
+                # the fit gives two arrays: fit parameters (p) and covariance matrix (covm for stdev estimation)
+
+                # depending on scipy version enforce the parameter limits or not
+                if (LooseVersion(scipy_version) >= LooseVersion("0.17")) and (
+                    param_bounds is not None
+                ):
+                    # NOTE adding ftol=0.01 and xtol=0.01 may in some cases speed up the fitting (loosens the convergence criteria)
+                    # default values 1e-8 are a bit too conservative; in preliminary tests the speedup was marginal
+                    p, covm = curve_fit(
+                        f,
+                        T,
+                        data,
+                        list(self.plate_results[i][0 : len(p0)]),
+                        bounds=param_bounds,
+                    )
+                else:
+                    p, covm = curve_fit(
+                        f,
+                        T,
+                        data.values,
+                        list(self.plate_results[i][0 : len(p0) + 1]),
+                    )
+                self.plate_results[i][len(p0) : (len(p0)) * 2] = p
+                # this is the official way to compute stdev error (from scipy docs)
+                self.plate_results_stdev[i][len(p0) : (len(p0)) * 2] = np.sqrt(
+                    np.diagonal(covm),
+                )
+            except RuntimeError:
+                # these probably correspond to bad fitting
+                self.print_message(
+                    f"Curve fit for {i} failed, probably invalid transition.",
+                    "w",
+                )
+                # generate a list of bad fits
+                self.bad_fit.append(i)
+            except ValueError as e:
+                self.print_message(e.__str__(), "w")
+                # catch some problems of fitting with santoro1988d
+                self.print_message(
+                    f"Curve fit for {i} failed unexpectedly (ValueError)",
+                    "w",
+                )
+                # generate a list of bad fits
+                self.bad_fit.append(i)
+
+            # supply sqrt(n) for S calculation
+            # to calculate RMSE we don't care about the amount of parameters,
+            # however, they must be included for standrard error of estimate
+            # (more info here: http://people.duke.edu/~rnau/compare.htm)
+            try:
+                self.plate_results[i].loc["S"] = np.sqrt(len(T) - len(p0))
+            except TypeError:
+                self.print_message(
+                    f"Curve for sample {i} has just one value!",
+                    "w",
+                )
+                # add to bad samples list
+                self.bad_fit.append(i)
+
+        # drop the wells that could not be fit
+        self.plate_results.drop(self.bad_fit, axis=1, inplace=True)
+        self.plate_results_stdev.drop(self.bad_fit, axis=1, inplace=True)
+
+        # empty DataFrame with temperature index to store computed fitted curves
+        self.plate_fit = pd.DataFrame(index=self.plate.index)
+
+        def calculate_fit(input_series):
+            """Helper function to compute fit curves
+            input DataFrame is self.plate_results.
+            """
+            # use the fit parameters from plate_results and the index of plate_fit to compute fit curves
+            self.plate_fit = pd.concat(
+                [
+                    self.plate_fit,
+                    pd.Series(
+                        f(
+                            self.plate_fit.index,
+                            *list(input_series[len(p0) : (len(p0)) * 2]),
+                        ),
+                        index=self.plate_fit.index,
+                        name=input_series.name,
+                    ),
+                ],
+                axis=1,
+            )
+
+        # apply the helper function
+        self.plate_results.apply(calculate_fit)
+
+        # calculate S and put it into plate_results
+        self.print_message("Estimating S...\n", "i")
+
+        # compute S using df_for_fitting
+        plate_S = (df_for_fitting - self.plate_fit) ** 2
+        plate_S = np.sqrt(plate_S.sum())
+        self.plate_results.loc["S", :] = plate_S / self.plate_results.loc["S", :]
+
+        # transpose the DataFrame to have sample wells in rows
+        self.plate_results = self.plate_results.T
+        self.plate_results_stdev = self.plate_results_stdev.T
+
+        # compute BS-factor to assess how wide is the signal window relative to the noise
+        # this requires knowledge of baseline parameters and Tm
+        if {"kN", "bN", "kU", "bU", "Tm"}.issubset(model.param_names()):
+            # Use S as an overall proxy for assay noise
+            self.plate_results["BS_factor"] = 1 - 6 * self.plate_results.S / abs(
+                self.plate_results.kU_fit * self.plate_results.Tm_fit
+                + self.plate_results.bU_fit
+                - (
+                    self.plate_results.kN_fit * self.plate_results.Tm_fit
+                    + self.plate_results.bN_fit
+                ),
+            )
+
+        # compute baseline-corrected raw data if standard baseline parameters are available
+        if {"kN", "bN", "kU", "bU"}.issubset(model.param_names()):
+            self._calculate_raw_corr()
+
+        # add layout information to the results
+        if self.layout is not None:
+            self.plate_results = pd.concat(
+                [self.layout, self.plate_results],
+                join="inner",
+                axis=1,
+            )
+
+        # remove *_init columns and S from self.plate_results_stdev
+        self.plate_results_stdev.drop(
+            labels=result_index[: len(p0)],
+            axis=1,
+            inplace=True,
+        )
+        self.plate_results_stdev.drop(labels="S", axis=1, inplace=True)
+
+        # based on the model, calculate additional curve characteristics and set the vertical lines for plots
+        if model.sortby == "dG_std":
+            # Calculate T_onset
+            self._calc_Tons("Tm_fit", "dHm_fit", self.onset_threshold)
+            # dG and dCp component
+            self.CalculateThermodynamic()
+            # list of vertical lines to be plotted
+            self.plotlines = ["T_onset", "Tm_fit"]
+        elif model.sortby == "dG_comb_std":
+            # Calculate T_onset
+            self._calc_Tons("T1_fit", "dHm1_fit", self.onset_threshold)
+            # Calculate T2 from dT2_1_fit
+            # TODO error propagation
+            self.plate_results["T2_fit"] = (
+                self.plate_results["T1_fit"] + self.plate_results["dT2_1_fit"]
+            )
+            # NOTE dCp is hard to determine for the intermediate, so it is completely neglected
+            # dG is calculated for each reaction (N<->I and I<->U) and then combined following the principle of thermodynamic coupling
+            self.plate_results["dG_comb_std"] = self.plate_results["dHm1_fit"] * (
+                1 - T_std / self.plate_results["T1_fit"]
+            ) + self.plate_results["dHm2_fit"] * (
+                1 - T_std / self.plate_results["T2_fit"]
+            )
+            # list of vertical lines to be plotted
+            self.plotlines = ["T_onset", "T1_fit", "T2_fit"]
+        elif model.sortby == "T_eucl":
+            # computes Euclidean distance for Tm/T_onset
+            # Tm and T_ons are on the same scale and are orthogonal characteristics of the sigmoidal curve
+            # thus, a sample with the most optimal combination is the one that is most far away from T=0
+            self.plate_results["T_eucl"] = np.sqrt(
+                self.plate_results["Tm_fit"] ** 2
+                + self.plate_results["T_onset_fit"] ** 2,
+            )
+            # list of vertical lines to be plotted
+            self.plotlines = ["T_onset_fit", "Tm_fit"]
+        elif model.sortby == "T_eucl_comb":
+            self.plate_results["T_eucl_comb"] = np.sqrt(
+                self.plate_results["T1_fit"] ** 2
+                + self.plate_results["T_onset1_fit"] ** 2,
+            ) + np.sqrt(
+                self.plate_results["T2_fit"] ** 2
+                + self.plate_results["T_onset2_fit"] ** 2,
+            )
+            self.plotlines = [
+                "T_onset1_fit",
+                "T1_fit",
+                "T_onset2_fit",
+                "T2_fit",
+            ]
+        elif model.sortby == "pk_ratio_std":
+            # the kF/kR at std temperature; take as -log10 to have higher values for higher stability
+            self.plate_results["pk_ratio_std"] = -np.log10(
+                model.arrhenius(
+                    T_std,
+                    self.plate_results.TfF_fit,
+                    self.plate_results.EaF_fit,
+                )
+                / model.arrhenius(
+                    T_std,
+                    self.plate_results.TfR_fit,
+                    self.plate_results.EaR_fit,
+                ),
+            )
+            self.plotlines = ["TfF_fit", "TfR_fit"]
+        elif model.sortby == "pk_std":
+            # For irreversible reactions calculate the unfolding rate constant at std temperature
+            # based on Tf and Ea values
+            # NOTE by convention higher sorting value indicates higher stability, but it is not the case
+            # for rate constant of reaction N -> U; a common trick in chemistry is to calculate the
+            # negative log10
+            self.plate_results["pk_std"] = -np.log10(
+                model.arrhenius(
+                    T_std,
+                    self.plate_results.Tf_fit,
+                    self.plate_results.Ea_fit,
+                ),
+            )
+            self.plotlines = ["Tf_fit"]
+        else:
+            self.print_message(
+                "Model {} contains no measure for final sorting".format(
+                    model.short_name,
+                ),
+                "w",
+            )
+            self.plotlines = []
+
+        # sort_values based on the model's sortby property
+        if model.sortby is not None:
+            self.plate_results.sort_values(
+                by=model.sortby,
+                inplace=True,
+                ascending=False,
+            )
+
+        # for proper json i/o
+        self.plate_results.index.name = "ID"
+        self.plate_results_stdev.index.name = "ID"
+
+    def CalculateThermodynamic(self):
+        """Calculates some thermodynamic characteristics of data and append them to self.plate_results:
+        > dG_std - Gibbs free energy of unfolding at standard temperature (298 K), extrapolated using the values of dCp
+        > dCp - heat capacity change of unfolding (supplied by user either in the layout or in the command line)
+        > dHm - enthalpy of unfolding at Tm (calculated during fitting).
+        """
+        # create temporary dataframe to store td results
+        column_names = ["dCp", "dG_std"]
+        # NOTE by default empty dataframes get "object" data type, which can cause problems when using numpy
+        # if the datatype is specified explicitly, the problem should not exist
+        # NOTE since dCp is set externally, it will be just copy-pasted and no stdev can be computed
+        td_results = pd.DataFrame(
+            index=self.plate_results.index,
+            columns=column_names,
+            dtype="float64",
+        )
+        # create a separate dataframe for storing stdev
+        td_results_stdev = pd.DataFrame(
+            index=self.plate_results.index,
+            columns=[column_names[1]],
+            dtype="float64",
+        )
+
+        if self.dCp == "from_layout":
+            td_results["dCp"] = self.layout["dCp"]
+        else:
+            td_results["dCp"] = self.dCp
+
+        # issue a warning about inaccuracy of extrapolated dG with dCp=0
+        if any(td_results["dCp"] == 0):
+            self.print_message(
+                "One or more dCp values are set to 0; this results in an overestimate of dG_std",
+                "w",
+            )
+
+        # dG_std is extrapolated to standard temperature using the model described in Becktel and Schellman, 1987
+        # Tm is chosen as the reference temperature for equation (4), which also means that dS(Tm) = dH(Tm)/Tm
+        td_results["dG_std"] = self.plate_results["dHm_fit"] * (
+            1 - T_std / self.plate_results["Tm_fit"]
+        ) - td_results["dCp"] * (
+            self.plate_results["Tm_fit"]
+            - T_std
+            + T_std * np.log(T_std / self.plate_results["Tm_fit"])
+        )
+        td_results_stdev["dG_std"] = np.sqrt(
+            self.plate_results_stdev["dHm_fit"] ** 2
+            + (self.plate_results["dHm_fit"] * T_std / self.plate_results["Tm_fit"])
+            ** 2
+            * (
+                (self.plate_results_stdev["dHm_fit"] / self.plate_results["dHm_fit"])
+                ** 2
+                + (self.plate_results_stdev["Tm_fit"] / self.plate_results["Tm_fit"])
+                ** 2
+            )
+            + (td_results["dCp"] * T_std) ** 2
+            * (
+                (self.plate_results_stdev["Tm_fit"] / T_std) ** 2
+                + (self.plate_results_stdev["Tm_fit"] / self.plate_results["Tm_fit"])
+                ** 2
+            ),
+        )
+
+        # calculate contribution of dCp to the real dG
+        td_results["dCp_component"] = (
+            T_std
+            - self.plate_results["Tm_fit"]
+            - T_std * np.log(T_std / self.plate_results["Tm_fit"])
+        )
+        td_results_stdev["dCp_component"] = np.sqrt(
+            (self.plate_results_stdev["Tm_fit"]) ** 2
+            + (
+                T_std
+                * self.plate_results_stdev["Tm_fit"]
+                / self.plate_results["Tm_fit"]
+            )
+            ** 2,
+        )
+
+        # append td_results to self.plate_results and self.plate_results_stdev
+        self.plate_results = pd.concat(
+            [self.plate_results, td_results.loc[:, ["dCp_component", "dG_std"]]],
+            axis=1,
+            sort=True,
+        )
+
+        self.plate_results_stdev = pd.concat(
+            [self.plate_results_stdev, td_results_stdev],
+            axis=1,
+            sort=True,
+        )
+
+    def CombineResults(self, tm_stdev_filt, bs_filt, merge_dup, tm_key):
+        """A helper method to filter results and optionally average duplicates.
+
+        Parameters
+        ----------
+        tm_stdev_filt
+            samples with stdev for Tm above this value will be discarded
+        tm_key
+            the name for column with Tm
+        bs_filt
+            samples with BS-factor above this value will be kept
+        merge_dup
+            whether to merge duplicates (based on annotations in the layout)
+
+        Returns:
+        -------
+        results - joined plate_results/stdev DataFrame with optional filtering and duplicate averaging
+
+        Notes:
+        -----
+        * not all plate_results can contain BS-factor, so this part of filtering will be skipped
+        """
+        # NOTE to prevent irreversible changes to plate_results* attributes, force a copy
+        results = self.plate_results.copy()
+
+        # we may have 2 types of stdev, one from fitting and another one from duplicate averaging
+        stdev_fit = self.plate_results_stdev.copy()
+
+        # add suffix _stdev to prevent duplicate column names in joined df's
+        stdev_fit.columns = stdev_fit.columns + "_stdev"
+
+        # NOTE previously, merge_dup was run _before_ stdev filtering, but this messes up
+        # subsequent description of the procedure. Also, this operation joins 2 processes
+        # (removal of bad fits and removal of invalid dups) in one, which is bad
+        # a better way: first filter to remove crappy fits
+        # then merge dups and use stdev that comes from them.
+        # This approach also obsoletes the methods for stdev "joining"
+
+        # use stdev_fit dataframe to get ID's of samples that have improper values
+        if tm_stdev_filt > 0:
+            drop_tm_stdev = stdev_fit[tm_key + "_fit_stdev"][
+                stdev_fit[tm_key + "_fit_stdev"] > tm_stdev_filt
+            ].index
+            results = results.drop(drop_tm_stdev, axis=0)
+        if bs_filt > 0:
+            try:
+                results = results[results["BS_factor"] >= bs_filt]
+            except KeyError:
+                self.print_message(
+                    "Column 'BS_factor' not found in the results DataFrame, cannot filter",
+                    "w",
+                )
+
+        if merge_dup:
+            # TODO add an option for discarding samples that do not have a duplicate
+            # averaging capillary numbers doesn't make sense, convert them to strings
+            if "Capillary" in results.columns:
+                results.Capillary = results.Capillary.apply(str)
+
+            # processing steps: remove duplicates with different aggregation functions:
+            # - text data (ID, Capillary) - create a comma-separated string of values
+            # - result data - compute mean, stdev and n (amount of duplicated values)
+
+            # prepare DataFrames:
+            # split result data to text and numeric
+            text_data = results.select_dtypes(include=["object"])
+            numeric_data = results.select_dtypes(include=["float64", "int64"])
+
+            # for text data convert ID index to a new column (a bit hacky)
+            text_data.reset_index(inplace=True)
+            text_data.set_index("ID", inplace=True, drop=False)
+
+            # since the column for de-duplication is text, it has to be re-created for numeric_data and stdev_data
+            numeric_data = pd.concat([numeric_data, text_data["Condition"]], axis=1)
+
+            # perform deduplication (grouping)
+
+            # for text data we declare a lambda function
+            text_data = text_data.groupby("Condition").agg(lambda x: ",".join(list(x)))
+
+            # results data - count of each sample
+            numeric_data_count = numeric_data.groupby("Condition").agg("count")
+            # now also stdev (add a proper suffix to index)
+            numeric_data_stdev = numeric_data.groupby("Condition").agg(np.std)
+            numeric_data_stdev.columns = numeric_data_stdev.columns + "_stdev"
+            # and finally average
+            numeric_data = numeric_data.groupby("Condition").agg(np.mean)
+
+            # add n information to the text_data
+            # BUG if after filtering the dataset is empty, this operation produces an error
+            # this is caught with try/except
+            try:
+                text_data["n"] = numeric_data_count.iloc[:, [0]]
+            except ValueError as e:
+                self.print_message(e.__str__(), "w")
+                self.print_message(
+                    "No data left after filtering in CombineResults()",
+                    "w",
+                )
+
+            # update the results dataframe
+            results = pd.concat([text_data, numeric_data, numeric_data_stdev], axis=1)
+        else:
+            results = pd.concat([results, stdev_fit], axis=1)
+        return results
+
+    def RenameResults(self, datatype="sct"):
+        """#HACK rename Tm to Tagg in for scattering data
+        # also chemical denaturation renaming can be done here.
+        """
+        if datatype == "sct":
+            rename_dict = {"Tm_init": "Tagg_init", "Tm_fit": "Tagg_fit"}
+        elif datatype == "chem":
+            rename_dict = {"Tm_init": "dGH2O_init", "Tm_fit": "dGH2O_fit"}
+            msg = "Chemical denaturation renaming is not there yet"
+            raise NotImplementedError(msg)
+
+        self.plate_results.rename(columns=rename_dict, inplace=True)
+        self.plate_results_stdev.rename(columns=rename_dict, inplace=True)
+
+    @staticmethod
+    def _trim_string(string, length=30, symmetric=True):
+        """Helper method to trim a string to a specific length by removing the middle part.
+
+        Arguments:
+        ---------
+        string
+            string to be trimmed
+        length
+            length of the data to keep
+        symmetric
+            if true, also show the end of the string
+        """
+        # ensure that input is a str
+        string = str(string)
+        # if too short, return as is
+        if len(string) <= length:
+            return string
+        if symmetric:
+            return string[: length // 2] + " ... " + string[-length // 2 :]
+        else:
+            return string[:length] + "..."
+
+    def _plotfig_pdf(self, samples, failed=False):
+        """A helper method for smart packing of individual sample plots into figures.
+
+        Parameters
+        ----------
+        samples
+            a valid list of samples to be plotted
+        failed
+            indicate if samples are from failed fits
+
+        Returns:
+        -------
+        A list of Figure objects to be added to pages list
+        """
+        if failed:
+            suptitle = "Excluded/failed samples"
+            datatype = "raw"
+        else:
+            suptitle = "Successful fits"
+            datatype = "overview"
+        pages = []
+        n_results = len(samples)
+        # intialize figures, axes, and plot counter
+        plot_fig, plot_axs = plt.subplots(
+            4,
+            3,
+            sharex=False,
+            sharey=False,
+            figsize=(8.3, 11.7),
+        )
+        plot_fig.suptitle(suptitle, fontweight="bold", fontsize="x-large")
+        plot_axs = list(plot_axs.flat)
+        plot_counter = 0
+        for sample in samples:
+            if plot_counter < 11:
+                self.plotfig(
+                    "dummy_output",
+                    sample,
+                    datatype=datatype,
+                    save=False,
+                    show=False,
+                    data_ax=plot_axs[plot_counter],
+                    vline_legend=True,
+                )
+                plot_counter += 1
+            # if the 11th plot was plotted, append old figure and initialize a new one
+            # this should be also triggered if less than 11 is plotted
+            if (plot_counter == 11) or (plot_counter == n_results):
+                # make legend in the next axes
+                plot_axs[plot_counter].legend(
+                    handles=plot_axs[plot_counter - 1].get_lines(),
+                    mode="expand",
+                    ncol=1,
+                )
+                # hide remaining unused axes
+                while plot_counter <= 11:
+                    plot_axs[plot_counter].set_axis_off()
+                    plot_counter += 1
+                plot_fig.tight_layout(
+                    rect=(0.02, 0.05, 0.98, 0.95),
+                )  # rect leaves some margins empty
+                pages.append(plot_fig)
+                plot_fig, plot_axs = plt.subplots(
+                    4,
+                    3,
+                    sharex=False,
+                    sharey=False,
+                    figsize=(8.3, 11.7),
+                )
+                plot_fig.suptitle(
+                    suptitle + " (continued)",
+                    fontweight="bold",
+                    fontsize="x-large",
+                )
+                plot_axs = list(plot_axs.flat)
+                plot_counter = 0
+        return pages
+
+    def PdfReport(self, outfile):
+        """Generate and write a multi-page PDF report.
+
+        Parameters
+        ----------
+        outfile
+            location of the output, overwrite without confirmation
+
+        Notes:
+        -----
+        * heatmap and converter96 are ancient methods, so they are just carefully wrapped around
+        * each page is a figure object
+        * multi-page pdf as per mpl [docs](https://matplotlib.org/stable/gallery/misc/multipage_pdf.html)
+        """
+        from matplotlib.backends.backend_pdf import PdfPages
+        from matplotlib.table import table as mpl_table
+
+        result_columns = (
+            self.getResultsColumns()
+        )  # the first value should be the recommended sorting parameter followed by vlines and BS/S
+        # add condition column
+        result_columns = ["Condition", *result_columns]
+        sort_parameter = result_columns[1]
+        # preprocess the result df
+        result_table = self.plate_results.loc[
+            :,
+            result_columns,
+        ].copy()  # to prevent edits to the original DF
+        result_table = np.round(result_table, 2)  # round the numeric data
+        result_table["Condition"] = result_table["Condition"].apply(
+            self._trim_string,
+            length=10,
+            symmetric=False,
+        )
+        result_table_colors = result_table.copy()  # color table 1.0 is white 0 is black
+        result_table_colors.loc[:, :] = "1.0"
+        result_table_colors.iloc[::2] = "0.75"
+        result_table_colors = result_table_colors.values
+        result_index = result_table.index
+        result_table = result_table.values  # convert to a list of lists
+        n_results = len(result_table)  # total number of results
+        pages = []
+
+        ## Page 1: Heatmap of the respective sortby parameter, top 15 results, run info
+        plate96 = self.converter96(sort_parameter, reference=None)
+        page1, page1_ax = self.heatmap(
+            "dummy_output",
+            plate96,
+            sort_parameter,
+            save=False,
+            pdf_report=True,
+        )
+        page1_ax[0].set_title(
+            f"Heatmap of {sort_parameter}",
+            loc="left",
+            fontweight="bold",
+        )
+        # mpl tables cannot do word wrapping, so trim the file name
+        filename = self._trim_string(self.filename)
+        mp_version = __version__
+        if from_pyinstaller:
+            mp_version += " (PyInstaller bundle)"
+        timestamp = strftime("%c")
+        # failed fits and user-excluded samples
+        excluded = self._get_failed_samples()
+        if len(excluded) > 0:
+            excluded_str = self._trim_string(", ".join(list(excluded)))
+        else:
+            excluded_str = "None"
+
+        info_table = [
+            ["Timestamp", timestamp],
+            ["Input file", filename],
+            ["Scan rate, degrees/min", self.scan_rate],
+            ["MoltenProt version", mp_version],
+            ["Analysis model", self.model],
+            ["Excluded/failed samples", excluded_str],
+        ]
+        info_table_ax = page1_ax[2]
+        info_table = mpl_table(
+            info_table_ax,
+            info_table,
+            loc="upper left",
+            edges="open",
+            cellLoc="left",
+        )
+
+        # using the solution from here to set proper font size in the table:
+        # https://stackoverflow.com/questions/15514005/how-to-change-the-tables-fontsize-with-matplotlib-pyplot
+        info_table.auto_set_font_size(False)
+        info_table_ax.set_title("Run info", loc="left", fontweight="bold")
+        top10_table_ax = page1_ax[1]
+        mpl_table(
+            top10_table_ax,
+            result_table[:15, :],
+            loc="upper left",
+            colLabels=result_columns,
+            cellLoc="left",
+            rowLabels=result_index[:15],
+            cellColours=result_table_colors[:15, :],
+        )
+        top10_table_ax.set_title("Top 15 results", loc="left", fontweight="bold")
+
+        info_table_ax.set_axis_off()
+        top10_table_ax.set_axis_off()
+
+        # finalize page 1
+        page1.suptitle(
+            f"MoltenProt Report: {self.readout_type}",
+            fontweight="bold",
+            fontsize="x-large",
+        )
+        # add citation to the bottom of the page
+        page1.text(0.5, 0.05, citation["short"], ha="center")  # fontstyle='italic',
+        pages.append(page1)
+
+        ## Full result table - create if more than 15 results (but less than 48)
+        if n_results > 15:
+            page2, page2_ax = plt.subplots(1, 1, figsize=(8.3, 11.7))
+            mpl_table(
+                page2_ax,
+                result_table[:48],
+                loc="upper left",
+                colLabels=result_columns,
+                cellLoc="left",
+                rowLabels=result_index[:48],
+                cellColours=result_table_colors[:48],
+            )
+            page2_ax.set_axis_off()
+            page2_ax.set_title(
+                f"Result table (sorted by {sort_parameter})",
+                loc="left",
+                fontweight="bold",
+            )
+            pages.append(page2)
+
+        # if more than 48 samples are present
+        if n_results > 48:
+            page3, page3_ax = plt.subplots(1, 1, figsize=(8.3, 11.7))
+            mpl_table(
+                page3_ax,
+                result_table[48:],
+                loc="upper left",
+                colLabels=result_columns,
+                cellLoc="left",
+                rowLabels=result_index[48:],
+                cellColours=result_table_colors[48:],
+            )
+            page3_ax.set_axis_off()
+            page3_ax.set_title(
+                "Result table (continued)",
+                loc="left",
+                fontweight="bold",
+            )
+            pages.append(page3)
+
+        ## pages with plots of individual curves
+        pages += self._plotfig_pdf(self.plate_results.index)
+        ## same as above, but for failed fits
+        pages += self._plotfig_pdf(excluded, failed=True)
+
+        # write output
+        page_no = 1
+        page_count = len(pages)
+        with PdfPages(outfile) as pdf_file:
+            for i in pages:
+                # add page number and save
+                i.text(
+                    0.5,
+                    0.025,
+                    f"-- Page {page_no} of {page_count} --",
+                    fontstyle="italic",
+                    ha="center",
+                )
+                page_no += 1
+                pdf_file.savefig(i)
+
+        # clean up mpl objects
+        plt.close("all")
+
+    def _get_failed_samples(self):
+        """Return a list of samples that were either excluded or not fit.
+
+        Notes:
+        -----
+        * will raise a value error if the analysis is not done
+        """
+        return self.plate_raw.columns.difference(self.plate_results.index)
+
+    def WriteOutput(
+        self,
+        print10=False,
+        xlsx=False,
+        genpics=False,
+        heatmaps=[],
+        hm_ref=None,
+        heatmap_cmap=defaults["heatmap_cmap"],
+        resources_prefix="",
+        n_jobs=1,
+        no_data=False,
+        pdf=False,
+    ):
+        """Write the results to the disk.
+
+        Parameters
+        ----------
+        print10
+            print 10 best samples to stdout
+        xlsx
+            write output in XLSX rather than CSV
+        genpics
+            generate figures for all samples
+        heatmaps
+            generate heatmaps for selected plate_results columns
+        hm_ref
+            reference well for heatmap
+        heatmap_cmap
+            matplotlib colormap for heatmap
+        resources_prefix
+            extra string to add when saving files (e.g. to prevent overwrites in multi-dataset case)
+        n_jobs
+            number of subprocesses to run figure plotting in parallel
+        no_data
+            do not output any data
+        pdf
+            write a report in PDF format
+        """
+        # estimate how many samples could not go through the processing by finding the
+        # difference between the indexes of original dataset and the final results
+        # BUG if the sample is registered in xlsx annotations, but not present in the raw data plate
+        # (i.e. it was marked as bad capillary by Prometheus), then it will not be shown here
+        # such samples can be easily diagnosed through reports: even the raw-only signal is not plotted
+        # BUG failed samples have only ID's, but not layout info
+        failed_samples = self._get_failed_samples()
+
+        # if there is a least one empty sample create respective results dataframes for it
+        # and temporarily append them to the main results (will not persist as a class attribute)
+        # NOTE index objects are immutable, so a new empty df has to be created and concatenated
+        if len(failed_samples) > 0:
+            failed_results = pd.DataFrame(
+                index=failed_samples,
+                columns=self.plate_results.columns,
+            )
+            failed_results_stdev = pd.DataFrame(
+                index=failed_samples,
+                columns=self.plate_results_stdev.columns,
+            )
+            output_results = pd.concat([self.plate_results, failed_results])
+            output_results_stdev = pd.concat(
+                [self.plate_results_stdev, failed_results_stdev],
+            )
+        else:
+            output_results = self.plate_results
+            output_results_stdev = self.plate_results_stdev
+
+        output_path = self.resultfolder
+
+        if print10:
+            # print the 10 best conditions to the console
+            self.print_message("\n\nBest 10 conditions (best->worst):", "i")
+            print(", ".join(list(self.plate_results[:10].index)) + "\n")
+
+        # the checks for the folder existence are done in the main script
+        self.print_message("Writing results...", "i")
+        if not no_data:
+            if xlsx:
+                # create a holding object for *.xlsx export
+                writer = pd.ExcelWriter(
+                    os.path.join(output_path, resources_prefix + "_Results.xlsx"),
+                )
+
+                self.plate_raw.to_excel(writer, "Raw data")
+                self.plate.to_excel(writer, "Preprocessed data")
+                self.plate_fit.to_excel(writer, "Fit curves")
+                self.plate_raw_corr.to_excel(writer, "Baseline-corrected")
+                output_results.to_excel(writer, "Fit parameters")
+                output_results_stdev.to_excel(writer, "Standard deviations")
+
+                # this is the longest part in XLSX saving procedure
+                writer.save()
+            else:
+                # convert plate_results* dataframes to *.csv's
+                output_results.to_csv(
+                    os.path.join(output_path, resources_prefix + "_results.csv"),
+                    sep=",",
+                    index_label="Parameters",
+                    encoding="utf-8",
+                )
+                output_results_stdev.to_csv(
+                    os.path.join(output_path, resources_prefix + "_results_stdev.csv"),
+                    sep=",",
+                    index_label="Parameters",
+                    encoding="utf-8",
+                )
+                self.plate_fit.to_csv(
+                    os.path.join(output_path, resources_prefix + "_fit.csv"),
+                    sep=",",
+                    encoding="utf-8",
+                )
+                self.plate.to_csv(
+                    os.path.join(output_path, resources_prefix + "_preproc_curves.csv"),
+                    sep=",",
+                    encoding="utf-8",
+                )
+                self.plate_raw_corr.to_csv(
+                    os.path.join(output_path, resources_prefix + "_raw_corr.csv"),
+                    sep=",",
+                    encoding="utf-8",
+                )
+        # PDF report
+        if pdf:
+            self.PdfReport(os.path.join(output_path, resources_prefix + "_report.pdf"))
+
+        # generate heatmaps
+        if len(heatmaps) > 0:
+            if "all" in heatmaps:
+                # columns shared (presumably) between all models
+                heatmaps = ["S", *self.plotlines]
+            for i in heatmaps:
+                try:
+                    plate96 = self.converter96(i, hm_ref)
+                    # check if the column name is in the hm_dic, if not use default options
+                    try:
+                        self.heatmap(
+                            output_path,
+                            plate96,
+                            i,
+                            lowerbetter=self.hm_dic[i]["lowerbetter"],
+                            title=self.hm_dic[i]["title"],
+                            tick_labels=self.hm_dic[i]["tick_labels"],
+                            heatmap_cmap=heatmap_cmap,
+                        )
+                    except KeyError:
+                        try:
+                            # print that lowerbetter for this heatmap can be wrong
+                            self.print_message(
+                                "{} was not found in hm_dic, the colors may be wrong!".format(
+                                    i,
+                                ),
+                                "w",
+                            )
+                            self.heatmap(
+                                output_path,
+                                plate96,
+                                i,
+                                title="Heatmap of " + str(i),
+                            )
+                        except:
+                            self.print_message(
+                                "Re-check the parameters for --heatmap option",
+                                "w",
+                            )
+                except ValueError:
+                    # NOTE a temporary fix for cases when a wrong heatmap parameter was supplied
+                    self.print_message(
+                        f'"{i}" is an invalid option for heatmap',
+                        "w",
+                    )
+                    print("Valid options are:")
+                    print(", ".join(list(self.plate_results.columns)))
+
+            # clean up after plotting so that no parameters are carried over to downstream plottings
+            plt.close("all")
+
+        # generate plots of individual samples
+        # BUG for multi-dataset instance using --genfigs will overwrite images
+        if genpics:
+            self.print_message("Generating figures... This may take a while...", "i")
+            # depending on the value of parallelization either run a single or multi-processor command
+            if parallelization and n_jobs > 1:
+                # with picklable plotfig method:
+                Parallel(n_jobs=n_jobs)(
+                    delayed(self.plotfig)(output_path=output_path, wellID=i)
+                    for i in self.plate_fit.columns.values
+                )
+                # plot raw data of failed samples
+                if len(failed_samples) > 0:
+                    Parallel(n_jobs=n_jobs)(
+                        delayed(self.plotfig)(
+                            output_path=output_path,
+                            datatype="raw",
+                            wellID=i,
+                        )
+                        for i in failed_samples
+                    )
+            else:
+                for i in self.plate_fit.columns.values:
+                    # save to default path
+                    self.plotfig(output_path, i)
+
+                # plot failed samples, if any
+                # TODO add the derivative curve so that the plots look nicer
+                if len(failed_samples) > 0:
+                    for i in failed_samples:
+                        self.plotfig(output_path=output_path, datatype="raw", wellID=i)
+
+
+class MoltenProtFitMultiple:
+    """The main class in MoltenProt; contains one or more datasets (i.e. MoltenProtFit instances)
+    and coordinates their processing.
+    """
+
+    def __init__(
+        self,
+        scan_rate=None,
+        denaturant=None,
+        layout=None,
+        source=None,
+    ) -> None:
+        """Only core settings are defined at the level of initialization:
+
+        layout:DF - annotation for all samples
+        denaturant:str - can be either a unit of temperature (C, K) or denaturant name (GuHCl, Urea)
+        all internal processing is done with temperature in Kelvins
+        source: string - the name of the file that was used to get the data (currently not the real path)
+        debug level?
+
+        All datasets must be added using a dedicated method.
+        Defaults to None, which is useful to re-create instances from JSON files
+        """
+        self.layout = layout
+        if layout is not None:
+            self.layout_raw = layout.copy()  # a backup copy of the original layout
+        else:
+            self.layout_raw = None
+        self.source = source
+        self.scan_rate = scan_rate
+        self.denaturant = denaturant
+        self.datasets = {}  # type: ignore[var-annotated]
+
+    def __getstate__(self):
+        """A special method to enable pickling of class methods."""
+        output = self.__dict__
+        output["PrepareAndAnalyseSingle"] = self.PrepareAndAnalyseSingle
+        output["WriteOutputSingle"] = self.WriteOutputSingle
+        return output
+
+    def print_message(self, text, message_type):
+        """Print a message and add it to the protocolString of all datasets."""
+        if message_type == "w":
+            prefix = "Warning: "
+        elif message_type == "i":
+            prefix = "Information: "
+        else:
+            msg = f"Unknown message type '{message_type}'"
+            raise ValueError(msg)
+        message = f"{prefix}{text} (All)"
+        print(message)
+        message += "\n"
+        for i, j in self.datasets.items():
+            j.protocolString += message
+
+    def GetAnalysisSettings(self):
+        """Get information on the analysis settings (shared settings and per-dataset model settings).
+
+        Returns:
+        -------
+        a tuple of two variables:
+            > dict with current analysis settings
+            > dataframe with dataset/model settings (compatible with QtTableWidget)
+        """
+        # NOTE currently all settings except for model are the same for all datasets
+        # start from default settings
+        analysis_settings = analysis_kwargs(analysis_defaults)
+        model_settings = pd.Series(
+            index=self.GetDatasets(),
+            name="Model",
+            data="santoro1988",
+        )
+        model_settings.index.name = "Dataset"
+
+        for dataset_name, dataset in self.datasets.items():
+            if dataset.analysisHasBeenDone():
+                analysis_settings = analysis_kwargs(dataset.__dict__)
+                model_settings[dataset_name] = dataset.model
+            # unprocessed MPF may not have the model attribute
+            if hasattr(dataset, "model") and dataset.model == "skip":
+                model_settings[dataset_name] = dataset.model
+
+        # reset index to comply with QtTableWidget
+        model_settings = model_settings.reset_index()
+
+        return (analysis_settings, model_settings)
+
+    def AddDataset(self, data, readout):
+        """Add an individual Dataset (as MP_fit instance in datasets attribute).
+
+        Parameters
+        ----------
+        data: pd.DataFrame
+            a ready-togo DataFrame with Index called "Temperature" or "Denaturant" (not implemented yet)
+        readout: str
+            a name for the dataset to be added; will be used to call internal data and thus has to be unique; spaces will be substituted with underscore
+
+        Notes:
+        -----
+        TODO add an option to indicate that the data is about aggregation (to rename to Tagg)
+        TODO set layouts here as well?
+        """
+        if readout in list(self.datasets.keys()):
+            msg = "Readout '{}' is already present in this MoltenProtFitMultiple instance!"
+            raise RuntimeError(
+                msg,
+            )
+
+        # in readout name convert spaces to underscores
+        readout = readout.replace(" ", "_")
+        # NOTE parent_filename is only used in reports
+        self.datasets[readout] = MoltenProtFit(
+            data,
+            scan_rate=self.scan_rate,
+            input_type="from_xlsx",
+            parent_filename=self.source,
+            denaturant=self.denaturant,
+            readout_type=readout,
+        )
+
+    def DelDataset(self, todelete):
+        """Remove a dataset from the instance.
+
+        Parameters
+        ----------
+        todelete
+            which dataset to delete
+
+        Notes:
+        -----
+        This permanently removes a dataset, so the data cannot be recovered. If the data should be retained, then set model to 'skip' in Analysis
+        """
+        if todelete in self.GetDatasets():
+            del self.datasets[todelete]
+        else:
+            self.print_message(f"Dataset '{todelete}' not found", "w")
+
+    def GetDatasets(self, no_skip=False):
+        """Returns available dataset names.
+
+        Parameters
+        ----------
+        no_skip
+            if True, then datasets with model 'skip' will not be included
+        """
+        if no_skip:
+            output = []
+            for dataset_name, dataset in self.datasets.items():
+                # NOTE model attribute is only added after processing
+                if hasattr(dataset, "model") and dataset.model == "skip":
+                    continue
+                output.append(dataset_name)
+            return tuple(output)
+        else:
+            return tuple(self.datasets.keys())
+
+    def UpdateLayout(self):
+        """For GUI communication only, update the layout of the datasets after the "master" layout in MPFM was changed.
+
+        Notes:
+        -----
+        BUG this method does not use the MPF.SetLayout, which makes code redundant
+        layouts are set only at SetAnalysisOptions state
+        """
+        for dataset_id, mp_fit in self.datasets.items():
+            mp_fit.layout = self.layout
+            # also update the layout info in plate_results (if present)
+            if hasattr(mp_fit, "plate_results"):
+                mp_fit.plate_results["Condition"] = self.layout["Condition"]
+
+    def ResetLayout(self):
+        """Change the master layout in MPFM to layout_raw (recorded during parsing of XLSX in newer versions of moltenprot) and update all MPF instances."""
+        if self.layout_raw is not None:
+            self.layout = (
+                self.layout_raw.copy()
+            )  # need to copy, because all edits to the layout will propagate to layout_raw, and it will not be "original"
+            self.UpdateLayout()
+        else:
+            self.print_message("Attribute layout_raw is None, nothing to reset", "w")
+
+    def SetScanRate(self, scan_rate):
+        """Sets a new scan rate (degC/min) to all datasets."""
+        # too high precision is not relevant
+        self.scan_rate = round(scan_rate, 2)
+        for dataset_id, mp_fit in self.datasets.items():
+            mp_fit.scan_rate = self.scan_rate
+
+    def RenameResultsColumns(self, which, mapping):
+        """E.g. in scattering data, the output is not Tm, but Tagg
+        Also, for chemical denaturation the columns can be completely different
+        NOTE currently done via RenameResults of MoltenProtFit.
+
+        Parameters
+        ----------
+        which : str
+            to which dataset to apply
+        mapping : dict
+            a dictionary with keys being original names and values being new names
+        """
+        self.print_message(
+            f"Renaming results columns in dataset {which}\n{mapping}",
+            "i",
+        )
+        self.datasets[which].plate_results.rename(columns=mapping, inplace=True)
+        self.datasets[which].plate_results_stdev.rename(columns=mapping, inplace=True)
+
+    def SetAnalysisOptions(self, which="all", printout=False, **kwargs):
+        """Set analysis options for a single dataset.
+
+        Parameters
+        ----------
+        which : str
+            To which dataset the options are applied; all means apply same settings for all datasets
+        printout : bool
+            print the settings
+        **kwargs
+            args for MoltenProtFit.SetAnalysisOptions()
+        """
+        # Add layout-related options to kwargs
+        if kwargs.get("blanks"):
+            # check if the current layout has any blanks listed and remove those
+            self.layout.Condition = self.layout.Condition.replace("Blank", "")
+            try:
+                self.layout.loc[kwargs["blanks"], "Condition"] = "Blank"
+            except KeyError:
+                self.print_message(
+                    "One or more blank samples have invalid IDs, no blank info will be available",
+                    "w",
+                )
+                # add to all datasets' protocol string
+
+        # NOTE if the same sample is listed as blank and exclude, it will have only exclude in the end
+        # i.e. exclusion by user has higher priority
+        if kwargs.get("exclude"):
+            # check if the current layout has any excluded samples and remove those
+            self.layout.Condition = self.layout.Condition.replace("Ignore", "")
+            try:
+                self.layout.loc[kwargs["exclude"], "Condition"] = "Ignore"
+            except KeyError:
+                self.print_message(
+                    "One or more excluded samples have invalid IDs, no exclusion done",
+                    "w",
+                )
+
+        kwargs["layout"] = self.layout
+        kwargs["layout_input_type"] = "from_xlsx"
+        # NOTE the differences from 1x processing:
+        # the layout is shared, so we use self.layout, and also tell MoltenProtFit instance that the layout is already a DataFrame (using layout_input_type)
+        if which == "all":
+            for i, j in self.datasets.items():
+                j.SetAnalysisOptions(**kwargs)
+        else:
+            self.datasets[which].SetAnalysisOptions(**kwargs)
+
+        if printout:
+            print(f"Data type is {i}")
+            self.datasets[which].printAnalysisSettings()
+
+    def PrepareAndAnalyseSingle(self, which):
+        """Run data processing pipeline on a single dataset.
+
+        Parameters
+        ----------
+        which
+            dataset name
+        """
+        self.datasets[which].PrepareData()
+        self.datasets[which].ProcessData()
+
+        # NOTE return statement is only needed for parallelized code (MoltenProtFitMultiple instance
+        # gets overwritten and computed results are not stored)
+        return self.datasets[which]
+
+    def PrepareAndAnalyseAll(self, n_jobs=1):
+        """Run analysis on all datasets.
+
+        Parameters
+        ----------
+        n_jobs : int
+            how many parallel processes to start
+        """
+        analysis_tuple = self.GetDatasets()
+
+        # parallelization of analysis routine
+        if parallelization and n_jobs > 1:
+            results_tuple = Parallel(n_jobs=n_jobs)(
+                delayed(self.PrepareAndAnalyseSingle)(i) for i in analysis_tuple
+            )
+            for i, j in zip(analysis_tuple, results_tuple):
+                self.datasets[i] = j
+        else:
+            for i in analysis_tuple:
+                self.PrepareAndAnalyseSingle(i)
+
+    def CombineResults(self, outfile, tm_stdev_filt=-1, bs_filt=-1, merge_dup=False):
+        """Join all plate_results/stdev DataFrames and write to a single XLSX file.
+
+        Parameters
+        ----------
+        outfile : str
+            where to write the output (a full path)
+        tm_stdev_filt : float
+            samples with Tm stdev above this value will be discarded
+        bs_filt : float
+            samples with BS-factor below this value will be discarded
+        merge_dup : bool
+            whether to aggregate samples with identical annotations in layout
+
+        Notes:
+        -----
+        TODO: add more generic filtering options
+        """
+        analysis_tuple = self.GetDatasets()
+
+        # initiate xlsx writer objects
+        writer = pd.ExcelWriter(outfile)
+
+        for i in analysis_tuple:
+            tm_key = "Tagg" if i == "Scattering" else "Tm"
+
+            # skip non-processed datasets (model=skip)
+            if self.datasets[i].model != "skip":
+                output = self.datasets[i].CombineResults(
+                    tm_stdev_filt=tm_stdev_filt,
+                    bs_filt=bs_filt,
+                    merge_dup=merge_dup,
+                    # merge_stdev=merge_stdev, DEPRECATED?
+                    tm_key=tm_key,
+                )
+                # NOTE Excel sheets are now named identically to input dataset names
+                output.to_excel(writer, i)
+
+        # write XLSX files to the result folder:
+        writer.save()
+
+    def GenerateReport(self, heatmap_cmap, template_path=None):
+        """Creates an interactive HTML report (as a string).
+
+        Parameters
+        ----------
+        heatmap_cmap
+            matplotlib colormap for heatmap
+        template_path
+            the HTML template
+
+        Returns:
+        -------
+        A string made from report template where placeholders were substituted to actual HTML code
+        """
+        # use default template if none provided
+        if template_path is None:
+            template_path = os.path.join(__location__, "resources/report.template")
+
+        # open the html template
+        with open(template_path) as template_file:
+            template = template_file.read()
+
+        # convert it to a string.Template instance
+        template = Template(template)
+
+        heatmap_table = ""
+        buttons = ""
+
+        # for the first heatmap display is table, for the rest it is none
+        display_heatmap = "table"
+
+        # for a single-dataset MPFm do not show buttons
+        display_buttons = "none" if len(self.datasets) == 1 else ""
+
+        # cycle through all datasets and get glue up strings to the starting button or heatmap string
+        for dataset_name, dataset in self.datasets.items():
+            # skip non-processed datasets
+            if dataset.model != "skip":
+                heatmap_table += dataset.html_heatmap(heatmap_cmap, display_heatmap)
+                if display_heatmap == "table":
+                    display_heatmap = "none"
+                    first_heatmap = dataset_name
+                buttons += dataset.html_button()
+
+        return template.substitute(
+            FILE=self.source,
+            FIRST_HEATMAP=first_heatmap,
+            HEATMAP_TABLE=heatmap_table,
+            DISPLAY_BUTTONS=display_buttons,
+            BUTTONS=buttons,
+            VERSION=__version__,
+            TIMESTAMP=strftime("%c"),
+        )
+
+    def WriteOutputSingle(
+        self,
+        which,
+        outfolder,
+        subfolder=False,
+        **kwargs,  # keyword args for WriteOutput
+    ):
+        """Write output to disc for a single dataset.
+
+        Parameters
+        ----------
+        which : dataset to process
+        outfolder : folder for output
+        heatmap_cmap : str
+            matplotlib colormap for heatmap
+        xlsx : bool
+            write output in XLSX format (default is CSV)
+        genpics : bool
+            create figures for samples
+        heatmaps : list
+            create heatmaps for the column in list
+        subfolder : bool
+            write output in outfolder (default) or create a subfolder called "which_resources"
+        n_jobs : int
+            how many parallel processes can be spawned
+        no_data : bool
+            no data output
+
+        Notes:
+        -----
+        * No output generated for datasets with model "skip"
+        * Do not use this method directly, use WriteOutputAll instead
+        """
+        if self.datasets[which].model == "skip":
+            pass
+        else:
+            if subfolder:
+                outfolder = os.path.join(outfolder, which + "_resources")
+                os.makedirs(outfolder, exist_ok=True)
+
+            # HACK to minimize edits to MoltenProtFit assingment of outfolder is done via the attribute
+            self.datasets[which].resultfolder = outfolder
+            self.datasets[which].WriteOutput(
+                resources_prefix=which,
+                **kwargs,
+            )
+            # delete the attribute completely
+            del self.datasets[which].resultfolder
+
+    def WriteOutputAll(
+        self,
+        outfolder,
+        # report,
+        xlsx=False,
+        genpics=False,
+        heatmaps=[],
+        report_format=None,
+        heatmap_cmap=defaults["heatmap_cmap"],
+        n_jobs=1,
+        no_data=False,
+        session=False,
+    ):
+        """Write output to disc for all associated datasets.
+
+        Parameters
+        ----------
+        outfolder : str
+            the folder where report.html will be placed and per-dataset subfolders
+        xlsx : bool
+            write output in XLSX format (default is CSV)
+        report : bool DEPRECATED
+            generate HTML report
+        summary : bool DEPRECATED
+            create a compact summary XLSX file
+        report_format : None or str 'pdf', 'html', 'xlsx'
+        n_jobs : int
+            how many output processes to run
+        genpics : bool
+            create figures for samples
+        heatmaps : list
+            create heatmaps for the column in list
+        heatmap_cmap : str
+            matplotlib colormap for heatmap
+        session : bool
+            save MP session in JSON format
+        """
+        # generate and populate a dict of output settings
+        output_kwargs = {}
+
+        if len(self.datasets) == 1:
+            # for single-dataset instances (and no reports planned)
+            # write everything to outdir
+            output_kwargs["subfolder"] = False
+
+        if xlsx:
+            output_kwargs["xlsx"] = True
+        if heatmaps:
+            # heatmaps should be a list with one or more elements
+            output_kwargs["heatmaps"] = heatmaps
+        if genpics:
+            output_kwargs["genpics"] = True
+
+        output_kwargs["heatmap_cmap"] = heatmap_cmap
+        output_kwargs["no_data"] = no_data
+
+        # NOTE since reports are pre-defined data bundles, they may override some of the previous settings
+        if report_format == "html":
+            # generate a reporthtml string
+            reporthtml = self.GenerateReport(heatmap_cmap=heatmap_cmap)
+            # write all datatets to dedicated subfolders
+            output_kwargs["subfolder"] = True
+            # write the HTML of the report to outdir
+            with open(os.path.join(outfolder, "report.html"), "w") as file:
+                file.write(reporthtml)
+            output_kwargs["xlsx"] = True
+            output_kwargs["genpics"] = True
+            output_kwargs["no_data"] = False
+        elif report_format == "xlsx":
+            self.CombineResults(os.path.join(outfolder, "report.xlsx"), -1, -1, False)
+        elif report_format == "pdf":
+            output_kwargs["pdf"] = True
+
+        # write output in parallel or serially
+        if parallelization and n_jobs > 1:
+            if len(self.GetDatasets()) == 1:
+                # if there is only a single associated dataset, it makes sense to enable parallel figure plotting
+                self.WriteOutputSingle(
+                    self.GetDatasets()[0],
+                    outfolder,
+                    n_jobs=n_jobs,
+                    **output_kwargs,
+                )
+            else:
+                Parallel(n_jobs=n_jobs)(
+                    delayed(self.WriteOutputSingle)(i, outfolder, **output_kwargs)
+                    for i in self.GetDatasets()
+                )
+        else:
+            # resultfolder was cleaned previously or created fresh so we just have to supply a proper prefix
+            for i in self.GetDatasets():
+                self.WriteOutputSingle(i, outfolder, **output_kwargs)
+
+        # NOTE JSON dumping must be done _AFTER_ all parallelized jobs!
+        if session:
+            mp_to_json(self, output=os.path.join(outfolder, "MP_session.json"))
+
+
+class MoltenProtFitMultipleLE(MoltenProtFitMultiple):
+    """A special class to handle the lumry_eyring model
+    The main difference is that Scattering signal is required and
+    the datasets are processed sequentially.
+    """
+
+    def PrepareAndAnalyseAll(self, n_jobs=1):
+        """First the scattering data is fit to get Ea and Tf for reaction U->A
+        Then they are supplied as fixed parameters to fit reaction N <-kF, kR -> U
+        This fit is done in all other datasets.
+        """
+        # rename dataset if refolded data was used
+        if "Scattering (Unfolding)" in self.GetDatasets():
+            self.datasets["Scattering"] = self.datasets.pop("Scattering (Unfolding)")
+
+        # check if Scattering signal is present
+        if "Scattering" not in self.GetDatasets():
+            msg = "lumry_eyring model requires a Scattering dataset"
+            raise ValueError(msg)
+
+        # run analysis of Scattering data with irrev model (has to be changed from whatever was supplied)
+        self.datasets["Scattering"].model = "irrev"
+        self.PrepareAndAnalyseSingle("Scattering")
+        # cycle through all other datasets and add fixed parameters
+        for dataset in self.GetDatasets():
+            if dataset != "Scattering":
+                # NOTE in GUI it is possible that some datasets are not skipped, are not Scattering and
+                # do not have lumry_eyring model, for those SetFixedParameters will cause AttributeError,
+                # because non-LE models do not use this feature
+                if self.datasets[dataset].model == "lumry_eyring":
+                    self.datasets[dataset].SetFixedParameters(
+                        self.datasets["Scattering"]
+                        .plate_results.loc[:, ["Tf_fit", "Ea_fit"]]
+                        .T,
+                    )
+                self.PrepareAndAnalyseSingle(dataset)
+
+
+class MoltenProtFitMultipleRefold(MoltenProtFitMultiple):
+    """Contains special tweaks to work with refolding data from Prometheus NT.48.
+
+    * Analyse all datasets, but not the refolding datasets (addDataset for normal unfolding, addRefoldDataset
+    for refolding datasets, maintain them separately, but use the same keys in datasets dict)
+    * take the baseline parameters from unfolding datasets and draw fraction refolded
+    * for fraction refolded generate a separate report HTML file
+    * how to show this in GUI?
+
+    Eventually:
+    * add fraction_irreversibly_unfolded to the equation and fit refolding data
+    using the parameters fro unfolding as starting values
+    """
+
+    pass
+
+
+### Data parsers
+"""
+Functions to create a MoltenProtFitMultiple instance from a specific
+experimental data file.
+"""
+
+
+def _csv_helper(filename, sep, dec):
+    """Pre-processing steps for reading CSV files.
+
+    Returns:
+    -------
+    pd.DataFrame with Temperature as index
+    """
+    try:
+        data = pd.read_csv(
+            filename,
+            sep=sep,
+            decimal=dec,
+            index_col="Temperature",
+            encoding="utf-8",
+        )
+    except ValueError as e:
+        print(e)
+        msg = "Input *.csv file is invalid!\nCheck if column called 'Temperature' exists and separators are specified correctly"
+        raise ValueError(
+            msg,
+        )
+
+    # check if index contains duplicates and drop those
+    if data.index.duplicated().any():
+        print(
+            "Warning: Temperature scale contains duplicates, all but first occurence are dropped",
+        )
+        data = data.loc[data.index.drop_duplicates(), :]
+    return data
+
+
+def parse_plain_csv(
+    filename,
+    scan_rate=None,
+    sep=defaults["sep"],
+    dec=defaults["dec"],
+    denaturant=defaults["denaturant"],
+    readout=defaults["readout"],
+    layout=defaults["layout"],
+):
+    """Parse a standard CSV file with columns Temperature, A1, A2, ...
+
+    Parameters
+    ----------
+    filename : str
+        path to csv file
+    sep,dec - csv import parameters
+    denaturant : str
+        temperature in input file assumed  to be Celsius (default value C), but could be also in K
+    readout : str
+        name for the experimental technique (e.g. CD or F330), will be used as key in dataset dict
+    layout : str or None
+        specify a special *.csv file which defines the plate layout (i.e. what conditions are in each sample)
+
+    Returns:
+    -------
+    MoltenProtFitMultiple instance
+
+    #TODO add removal of zeros in A01, A02, etc
+    """
+    # read the CSV into a DataFrame
+    data = _csv_helper(filename, sep, dec)
+
+    # read layout (if provided)
+    if layout is not None:
+        try:
+            layout = pd.read_csv(layout, index_col="ID", encoding="utf_8")
+        except:
+            print(
+                "Warning: unsupported layout format! No layout info will be available",
+            )
+            layout = None
+
+    if layout is None:
+        # if no layout provided or could not be read, create an empty layout DataFrame
+        layout = pd.DataFrame(index=alphanumeric_index, columns=["Condition"])
+
+    # initialize and return a MoltenProtFitMultiple instance
+    output = MoltenProtFitMultiple(
+        scan_rate=scan_rate,
+        denaturant=denaturant,
+        layout=layout,
+        source=filename,
+    )
+    output.AddDataset(data, readout)
+    return output
+
+
+def parse_spectrum_csv(
+    filename,
+    scan_rate=None,
+    denaturant=defaults["denaturant"],
+    sep=defaults["sep"],
+    dec=defaults["dec"],
+    readout=defaults["readout"],
+):
+    """Parse CSV file with columns Temperature,wavelengths...
+
+    Parameters
+    ----------
+    filename : str
+        path to csv file
+    sep,dec - csv import parameters
+    denaturant : str
+        temperature in input file assumed to be Celsius (default value C), but could be also in K
+    readout : str
+        name for the experimental technique (e.g. CD or F330), will be used as key in dataset dict
+
+    Returns:
+    -------
+    MoltenProtFitMultiple instance
+
+    Notes:
+    -----
+    * Temperature axis is not sorted
+    * Layouts are generated automatically from column names (assumed to be respective wavelengths)
+    """
+    data = _csv_helper(filename, sep, dec)
+
+    # if data is too big, take a random subset
+    if len(data.columns) > 96:
+        print(
+            f"Warning: too many wavelengths in the spectrum ({len(data.columns)}), selecting random 96",
+        )
+        data = data.sample(n=96, axis=1)
+    # to be on the safe side, sort columns ascending
+    data = data.loc[:, sorted(data.columns)]
+    # apply the alphanumeric index
+    data = data.T
+    data["ID"] = list(alphanumeric_index[: len(data)])
+    data.index.name = "Condition"
+    data.reset_index(inplace=True)
+
+    # set ID as index
+    data.set_index("ID", inplace=True)
+    # extract layout info and drop from the main df
+    # initialize the layout dataframe
+    layout = pd.DataFrame(index=alphanumeric_index, columns=["Condition"])
+    layout.index.name = "ID"
+    layout.loc[data.index, "Condition"] = data.loc[:, "Condition"].copy()
+    data.drop(["Condition"], axis=1, inplace=True)
+    data = data.T
+
+    # initialize and return a MoltenProtFitMultiple instance
+    output = MoltenProtFitMultiple(
+        scan_rate=scan_rate,
+        denaturant=denaturant,
+        layout=layout,
+        source=filename,
+    )
+    output.AddDataset(data, readout)
+    return output
+
+
+def parse_prom_xlsx(filename, raw=False, refold=False, LE=False, deltaF=True):
+    """Parse a processed file from Prometheus NT.48. In these files temperature
+    is always in Celsius and the readouts are more or less known. Layout is read
+    from the overview sheet.
+
+    Parameters
+    ----------
+    filename : str
+        path to xlsx file
+    raw : bool
+        if the data is "raw" or "processed" in terms of the manufacturer's GUI
+    refold : bool
+        if refolding ramp was used (default False)
+    LE : bool
+        instead of standard instance, create the one with Lumry-Eyring model
+    deltaF : bool
+        compute an alternative signal-enhanced readout: F350-F330 difference
+        it is an extensive readout (proportional to protein conc, like F330 or F350),
+        which also makes the transitions more pronounced (like Ratio)
+
+    Returns:
+    -------
+    MoltenProtFitMultiple instance
+
+    Notes:
+    -----
+    * Parsing relies on sheet names in English
+    * Current implementation can successfully parse raw XLSX as long as there are less than 96 data columns (which is 3 times the number of capillaries). The data will be contaminated with straight lines of temperature and time
+
+    Todo:
+    * the sheets have a standardized order in the file: Overview, Readout1_(unfolding), Readout1_(unfolding)_derivative ...,  Readout1_(refolding), Readout1_(refolding)_derivative ... ; this can be used to parse data independently of sheet labels; also, the sheets containing "deriv" can be auto-excluded
+    """
+    # force the input file to have absolute path (to be stored in JSON session)
+    filename = os.path.abspath(filename)
+
+    # read the whole Excel file - get an Ordered Dict
+    input_xlsx = pd.read_excel(filename, None)
+
+    # parse the layout
+    # layout contains 3 columns: Condition, Capillary and dCp
+    # the capillary info can be appended during report generation, but not earlier (needed for Blanks/References etc)
+    # NOTE if the user manipulated the Overview sheet, additional non-data rows can be read in and produce a messy layout DF. This doesn't seem to affect the processing
+    if "Overview" not in input_xlsx:
+        msg = f"Input file {filename} contains no overview sheet"
+        raise ValueError(msg)
+
+    layout = input_xlsx["Overview"]
+    layout.reset_index(inplace=True)
+    # read scan rate from first row in column "Temperature Slope"
+    # NOTE without conversion to float scan_rate (even if 1) will be saved to JSON as "null"!
+    scan_rate = float(layout["Temperature Slope"].iloc[0])
+
+    layout = layout.reindex(["Capillary", "Sample ID", "dCp"], axis=1)
+    layout.rename(columns={"Sample ID": "Condition"}, inplace=True)
+    # concatenate A1-H12 and description, then use the "ID" column as the new index
+    layout = pd.concat([layout, alphanumeric_index], axis=1)
+    layout.set_index("ID", inplace=True)
+
+    # initialize a MoltenProtFitMultiple instance
+    if LE:
+        output = MoltenProtFitMultipleLE(
+            scan_rate=scan_rate,
+            layout=layout,
+            denaturant="C",
+            source=filename,
+        )
+    else:
+        output = MoltenProtFitMultiple(
+            scan_rate=scan_rate,
+            layout=layout,
+            denaturant="C",
+            source=filename,
+        )
+
+    # cycle through available readouts and add them to MPMultiple
+    if refold:
+        # Full list of readouts, currently only unfolding can be processed
+        # TODO add a class to process refolding data in conjunction with unfolding
+        readouts = (
+            "Ratio (Unfolding)",
+            "330nm (Unfolding)",
+            "350nm (Unfolding)",
+            "Scattering (Unfolding)",
+            "Ratio (Refolding)",
+            "330nm (Refolding)",
+            "350nm (Refolding)",
+            "Scattering (Refolding)",
+        )
+        output.print_message(
+            "Currently refolding data is treated separately from unfolding data",
+            "w",
+        )
+    else:
+        readouts = ("Ratio", "330nm", "350nm", "Scattering")
+
+    # NOTE to avoid multiple checks of the scan rate (temp and time scale are the same for all readouts)
+    refined_scan_rate = None
+    for i in readouts:
+        if i in list(input_xlsx.keys()):
+            data = input_xlsx[i]
+            """
+            Convert the read sheet from *.xlsx
+            The first column ("Unnamed: 0") contains several rows with NaN values that correspond to one or more columns of the annotations; there is at least one Called Sample ID, and then additional user-defined names. Those have to be removed
+            The next row contains the value "u'Time [s]'", and it becomes the first row once the previous operation is done.
+            """
+            data = data[data.iloc[:, 0].notna()]
+            data = data.iloc[1:, :]
+
+            if raw:
+                # warn the user that there is a potentially harmful data modification
+                output.print_message(
+                    "Import of raw data requires interpolation to have all readings on the same temperature scale, i.e. the data gets irreversibly modified",
+                    "w",
+                )
+                # in proc data currently there will be: shared time, shared temp, readings
+                # in raw data there are 3 columns for each sample: time, temperature, readings
+                # NOTE in some older versions of the raw data the time and temperature are actually the same!
+                # care must be taken if scan_rate is determined from such files
+
+                # extract readings, temperatures and times
+                readings = data.iloc[:, 2::3].copy()
+                temps = data.iloc[:, 1::3]
+                times = data.iloc[:, 0::3]
+                # the time and temperature of the first sample will be used in the final scale
+                time_scale = times.iloc[:, 0].astype(float)
+                temp_scale = temps.iloc[:, 0].astype(float)
+                # cycle through all samples and perform interpolation
+                for col_ix in range(len(readings.columns)):
+                    r_col = readings.columns[col_ix]
+                    t_col = temps.columns[col_ix]
+                    # interpolation function
+                    interpolator = interp1d(
+                        temps[t_col],
+                        readings[r_col],
+                        bounds_error=False,
+                    )
+                    # interpolated values for readings'
+                    readings.loc[:, r_col] = interpolator(temp_scale)
+                # add time and temperature of the first sample to the output data
+                data = pd.concat([time_scale, temp_scale, readings], axis=1)
+
+            # determine true scan rate by running a linear fit of temperature vs time
+            if refined_scan_rate is None:
+                temp_vs_time = data.iloc[:, :2].astype(float).dropna()
+                slope, intercept = np.polyfit(
+                    temp_vs_time.iloc[:, 0],
+                    temp_vs_time.iloc[:, 1],
+                    1,
+                )
+                refined_scan_rate = slope * 60  # convert degC/sec to degC/min
+
+            # remove the first column with time
+            data.drop(data.columns[0], inplace=True, axis=1)
+            # rename the first column to "Temperature"
+            data.rename(columns={data.columns[0]: "Temperature"}, inplace=True)
+            # set Temperature as the index column
+            data.set_index("Temperature", inplace=True)
+            # convert column names to A1-D12
+            data.columns = list(alphanumeric_index.iloc[0 : len(data.columns)])
+            # for compatibility with future pandas versions we must make sure that data type is float32
+            data = data.apply(pd.to_numeric, errors="coerce")
+
+            # Create a MoltenProtFit instance with this DataFrame as data source
+            output.AddDataset(data, i)
+        else:
+            output.print_message(f"Readout {i} not found", "w")
+
+    if deltaF:
+        # check if F330 and F350 are available
+        if {"330nm", "350nm"}.issubset(output.GetDatasets()):
+            readout_ids = ("330nm", "350nm")
+        elif {"330nm_(Unfolding)", "350nm_(Unfolding)"}.issubset(
+            output.GetDatasets(),
+        ):
+            readout_ids = ("330nm_(Unfolding)", "350nm_(Unfolding)")
+        else:
+            output.print_message(
+                "Cannot compute readout deltaF: either F330 or F350 is missing in the input data",
+                "w",
+            )
+            readout_ids = None
+
+        if readout_ids is not None:
+            # HACK Temperature in other datasets is already in Kelvins, but denaturant is set to C
+            # to prevent adding extra 273.15 to index temporarily set denaturant to K
+            output.denaturant = "K"
+            output.AddDataset(
+                output.datasets[readout_ids[1]].plate
+                - output.datasets[readout_ids[0]].plate,
+                "deltaF",
+            )
+            output.denaturant = "C"
+
+    # Check if any datasets could be properly added
+    if len(output.datasets) < 1:
+        msg = f"Input file {filename} contains no data"
+        raise ValueError(msg)
+
+    # assign refined scan_rate
+    if abs(refined_scan_rate - output.scan_rate) > 0.2:
+        # in range 1-7 degC/min the difference between nominal and true rate is less than 0.2 deg/min
+        output.print_message(
+            f"The difference between nominal ({output.scan_rate}) and estimated ({refined_scan_rate}) scan rate >0.2 degrees/min",
+            "w",
+        )
+    output.SetScanRate(refined_scan_rate)
+
+    return output
+
+
+def mp_from_json(input_file):
+    """Read a json file and if successful return a ready-to-use MoltenProtFit instance.
+
+    Parameters
+    ----------
+    input_file
+        input file in JSON format
+
+    Notes:
+    -----
+    BUG column ordering is messed up after JSON I/O
+    """
+    with open(input_file) as file:
+        return json.load(file, object_hook=deserialize)
+
+
+def mp_to_json(object_inst, output=None):
+    """Convert an MoltenProtFit/MPFMultiple instance to a JSON file.
+
+    Parameters
+    ----------
+    object_inst :
+        MP instance to be converted to JSON
+    output : string or None
+        if None, return a JSON string
+        if output=='self', then use object_inst.resultfolder attribute
+        otherwise use str as a location where to write
+
+    Returns:
+    -------
+    string
+        only if output parameter is None
+
+    Notes:
+    -----
+    BUG Column sorting is usually A1 A2..., but after json i/o it is A1 A10...
+    """
+    if output is None:
+        # indent=4 makes the output JSON more human-readable
+        return json.dumps(object_inst, default=serialize, indent=4)
+
+    # if output is some kind of string we can use it to write the output
+    if output == "self":
+        # DELETE only useful for old MPFMultiple
+        # a special case is when self.resultfolder is used
+        # in all other just use the user-provided string
+        output = os.path.join(object_inst.resultfolder, "MP_session.json")
+
+    with open(output, "w") as file:
+        json.dump(object_inst, file, default=serialize, indent=4)
+
+    """
+    # add compression: output file is smaller, but the process itself is slower
+    with gzip.GzipFile(self.resultfolder+'/MP_session.json.gz', 'w') as fout:
+        fout.write(json.dumps(self, default=serialize, indent=4))
+    """
+    return None
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/models.py b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/models.py
new file mode 100644
index 0000000..ac1ec99
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/models.py
@@ -0,0 +1,658 @@
+"""Copyright 2020,2021 Vadim Kotov, Thomas C. Marlovits.
+
+This file is part of MoltenProt.
+
+MoltenProt is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+MoltenProt is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with MoltenProt.  If not, see <https://www.gnu.org/licenses/>.
+"""
+"""
+NOTE
+This file formalizes models used in MoltenProt fitting (MPModel class)
+User-defined models can be added here
+"""
+
+# finding number of arguments in a function
+from inspect import signature
+
+import numpy as np
+
+# numeric integration
+from scipy.integrate import solve_ivp
+
+### Constants
+R = 8.314  # universtal gas constant
+T_std = 298.15  # standard temperature, in Kelvins
+
+
+class MoltenProtModel:
+    # dummy function is defined here, because it does not make sense for the
+    # instances to be able to have a different function
+    # the first argument HAS to be the X-axis (independent var)
+    fun = lambda x, k, b: k * x + b
+
+    # same applies for the model description - no point to make an instance-specific value
+    _description = "A dummy MoltenProt model"
+
+    # since a single class is created for each model, then the long name is already in the class name
+    # here we can also add a short name used in the main module
+    short_name = "model_template"
+
+    # what measure to use in final sorting - calculated in MoltenProtFit
+    # NOTE all measures are selected in such a way that higher values correspond to higher stability
+    # if None, then final sorting is skipped
+    sortby = None
+
+    def __init__(self, scan_rate=None) -> None:
+        """In a general case scan rate is not relevant, so it is set to None
+        For kinetic data it needs to be set for sure.
+        """
+        self.scan_rate = scan_rate
+
+    def __repr__(self) -> str:
+        """For interactive prompts or print() - show the model description."""
+        return self._description
+
+    def __str__(self) -> str:
+        """When converting to a string return the short name."""
+        return self.short_name
+
+    def param_names(self):
+        """Return parameter names encoded in the function declaration as a list."""
+        params = signature(self.fun).parameters
+        # skip parameter self
+        return list(params)[1:]
+
+    def param_init(self, input_data=None):
+        """Return starting parameters based on the input values
+        input_data must be a pd.Series with an index (e.g. Temperature)
+        returns None if the parameters should be guessed by curve_fit.
+        """
+        if input_data is None:
+            return
+
+    def param_bounds(self, input_data=None):
+        """Return the bounds based on the input data.
+        input_data must be a pd.Series with an index (e.g. Temperature)
+        returns (-np.inf, np.inf) if no bounds are to be set.
+        """
+        if input_data is None:
+            return (-np.inf, np.inf)
+        return None
+
+
+class EquilibriumTwoState(MoltenProtModel):
+    short_name = "santoro1988"
+    _description = "N <-> U"
+    sortby = "dG_std"  # type: ignore[assignment]
+
+    # original function
+    # d -> dHm
+    # NOTE parameter names do matter. If kN,bN,kU,bU and Tm are present, then _estimate_baseline routine
+    # will be run by MoltenProtFit instance to get the best possible starting values
+    def fun(self, T, kN, bN, kU, bU, dHm, Tm):
+        return (kN * T + bN + (kU * T + bU) * np.exp(dHm / R * (1 / Tm - 1 / T))) / (
+            1 + np.exp(dHm / R * (1 / Tm - 1 / T))
+        )
+
+    def param_bounds(self, input_data=None):
+        # if no data supplied, run the default action from the master class
+        # otherwise compute bounds from plate index or hard-coded
+        if input_data is None:
+            return super().param_bounds(None)
+        else:
+            return (
+                (-np.inf, -np.inf, -np.inf, -np.inf, 60000, min(input_data.index)),
+                (np.inf, np.inf, np.inf, np.inf, 4000000, max(input_data.index)),
+            )
+
+    def param_init(self, input_data=None):
+        # Initial parameters - pre baseline has no intercept and 45 degree slope
+        # original implementation did not put values for Tm, which was computed dynamically in main code
+        # here a good starting Tm is just the middle of th range (or 0 if no data provided)
+        # NOTE for custom models any parameter initialization code should be implemented here
+        if input_data is None:
+            return (1, 0, 2, 0, 100000, 0)
+        else:
+            return (
+                1,
+                0,
+                2,
+                0,
+                100000,
+                min(input_data.index)
+                + (min(input_data.index) + max(input_data.index)) / 2.0,
+            )
+
+
+class EquilibriumThreeState(MoltenProtModel):
+    short_name = "santoro1988i"
+    _description = "N <-> I <-> U"
+    # in theory total stability of the protein is the sum of stabilities of N and I
+    sortby = "dG_comb_std"  # type: ignore[assignment]
+
+    def fun(self, T, kN, bN, kU, bU, kI, dHm1, T1, dHm2, dT2_1):
+        # dT2_1 = T2 - T1, i.e. the distance between the two transitions
+        return (
+            kN * T
+            + bN
+            + kI * np.exp(dHm1 / R * (1 / T1 - 1 / T))
+            + (kU * T + bU)
+            * np.exp(dHm1 / R * (1 / T1 - 1 / T))
+            * np.exp(dHm2 / R * (1 / (T1 + dT2_1) - 1 / T))
+        ) / (
+            1
+            + np.exp(dHm1 / R * (1 / T1 - 1 / T))
+            + np.exp(dHm1 / R * (1 / T1 - 1 / T))
+            * np.exp(dHm2 / R * (1 / (T1 + dT2_1) - 1 / T))
+        )
+
+    def param_bounds(self, input_data=None):
+        # TESTING preliminary results show that no limits for dHm are better in intermediate mode
+        if input_data is None:
+            return super().param_bounds(None)
+        else:
+            # by definition T2 follows T1, so dT2_1 is > 0
+            # the upper bound for dT2_1 is 1/2 of the full temperature range (i.e. the limit on max distance between the two Tms)
+            return (
+                (
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    min(input_data.index),
+                    -np.inf,
+                    0,
+                    # allow dT2_1 to be +/-
+                ),
+                (
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    max(input_data.index),
+                    np.inf,
+                    (max(input_data.index) - min(input_data.index)) / 2,
+                ),
+            )
+
+    def param_init(self, input_data=None):
+        # Initial parameters - pre baseline has no intercept and are 45 degree slope
+        # For dT2_1 we start from the assumption that dT2_1=0, i.e. there is no 2nd transition
+        if input_data is None:
+            return (1, 0, 2, 0, 1, 100000, 0, 100000, 0)
+        else:
+            # T1 is heuristically placed in the middle of the temp range
+            temp_range = max(input_data.index) - min(input_data.index)
+            return (
+                1,
+                0,
+                2,
+                0,
+                1,
+                100000,
+                min(input_data.index) + 0.5 * temp_range,
+                100000,
+                0,
+            )
+
+
+class EmpiricalTwoState(MoltenProtModel):
+    short_name = "santoro1988d"
+    _description = "Same as santoro1988, but fits Tm and T_onset"
+    sortby = "T_eucl"  # type: ignore[assignment]
+    # NOTE onset threshold is hard-coded to 0.01, i.e. onset point is 1% unfolded
+    onset_threshold = 0.01
+
+    def fun(self, T, kN, bN, kU, bU, T_onset, Tm):
+        return (
+            kN * T
+            + bN
+            + (kU * T + bU)
+            * np.exp(
+                (T - Tm)
+                * np.log(self.onset_threshold / (1 - self.onset_threshold))
+                / (T_onset - Tm),
+            )
+        ) / (
+            1
+            + np.exp(
+                (T - Tm)
+                * np.log(self.onset_threshold / (1 - self.onset_threshold))
+                / (T_onset - Tm),
+            )
+        )
+
+    def param_bounds(self, input_data=None):
+        if input_data is None:
+            return super().param_bounds(None)
+        else:
+            return (
+                (
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    min(input_data.index),
+                    min(input_data.index),
+                ),
+                (
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    max(input_data.index) - 2,
+                    max(input_data.index),
+                ),
+            )
+
+    def param_init(self, input_data=None):
+        # Initial parameters - pre baseline has no intercept and are 45 degree slope
+        if input_data is None:
+            return (1, 0, 2, 0, 1, 2)
+        else:
+            return (
+                1,
+                0,
+                2,
+                0,
+                min(input_data.index) + 10,
+                (min(input_data.index) + max(input_data.index)) / 2.0,
+            )
+
+
+class EmpiricalThreeState(MoltenProtModel):
+    short_name = "santoro1988di"
+    _description = "Same as santoro1988i, but fits Tm and T_onset"
+    # similar to thermodynamic 3-state model: sum up Euclidean temperature distance
+    # for both reaction steps
+    sortby = "T_eucl_comb"  # type: ignore[assignment]
+    # NOTE onset threshold is hard-coded to 0.01, i.e. onset point is 1% unfolded
+    onset_threshold = 0.01
+
+    def fun(self, T, kN, bN, kU, bU, kI, T_onset1, T1, T_onset2, T2):
+        return (
+            kN * T
+            + bN
+            + kI
+            * np.exp(
+                (T - T1)
+                * np.log(self.onset_threshold / (1 - self.onset_threshold))
+                / (T_onset1 - T1),
+            )
+            + (kU * T + bU)
+            * np.exp(
+                (T - T2)
+                * np.log(self.onset_threshold / (1 - self.onset_threshold))
+                / (T_onset2 - T2),
+            )
+            * np.exp(
+                (T - T1)
+                * np.log(self.onset_threshold / (1 - self.onset_threshold))
+                / (T_onset1 - T1),
+            )
+        ) / (
+            1
+            + np.exp(
+                (T - T1)
+                * np.log(self.onset_threshold / (1 - self.onset_threshold))
+                / (T_onset1 - T1),
+            )
+            + np.exp(
+                (T - T2)
+                * np.log(self.onset_threshold / (1 - self.onset_threshold))
+                / (T_onset2 - T2),
+            )
+            * np.exp(
+                (T - T1)
+                * np.log(self.onset_threshold / (1 - self.onset_threshold))
+                / (T_onset1 - T1),
+            )
+        )
+
+    def param_bounds(self, input_data=None):
+        if input_data is None:
+            return super().param_bounds(None)
+        else:
+            return (
+                [
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    min(input_data.index),
+                    min(input_data.index),
+                    min(input_data.index),
+                    min(input_data.index),
+                ],
+                [
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    max(input_data.index),
+                    max(input_data.index),
+                    max(input_data.index),
+                    max(input_data.index),
+                ],
+            )
+
+    def param_init(self, input_data=None):
+        # Initial parameters - pre baseline has no intercept and are 45 degree slope
+        if input_data is None:
+            return (1, 0, 2, 0, 1, 1, 1, 1, 1)
+        else:
+            temp_range = max(input_data.index) - min(input_data.index)
+            # with these starting values first transition is supposed to start in the beginning of the curve
+            # and the second transition starts in the end of the curve. Then they should meet
+            return (
+                1,
+                0,
+                2,
+                0,
+                1,
+                min(input_data.index) + 0.2 * temp_range,
+                min(input_data.index) + 0.4 * temp_range,
+                max(input_data.index) - 0.4 * temp_range,
+                max(input_data.index) - 0.2 * temp_range,
+            )
+
+
+class IrreversibleTwoState(MoltenProtModel):
+    short_name = "irrev"
+    _description = "N -> U"
+    sortby = "pk_std"  # type: ignore[assignment]
+    xn = 1  # y0 (starting condition) for differential equation
+
+    def __init__(self, scan_rate) -> None:
+        # scan rate is an essential parameter and must thus be set explicitly
+        if scan_rate is not None:
+            self.scan_rate = scan_rate
+        else:
+            msg = f"{self.short_name} model requires scan_rate to be set"
+            raise ValueError(
+                msg,
+            )
+
+    def arrhenius(self, t, Tf, Ea):
+        """Arrhenius equiation: defines dependence of reaction rate constant k on temperature
+        In this version of the equation we use Tf (a temperature of k=1)
+        to get rid of instead of pre-exponential constant A.
+        """
+        return np.exp(-Ea / R * (1 / t - 1 / Tf))
+
+    def ode(self, t, xn, Tf, Ea):
+        """Ordinary differential equation for fraction native versus temperature
+        dxn/dT = -1/v*k(T)*xn.
+
+        start_value xn - should be always 1 because at the start of assay we assume everything is folded
+        v - scan rate to convert minutes of scan rate to degrees (default 1)
+        xn - fraction native (xn + xagg = 1)
+        k(T) - temperature-dependent rate constant of aggregation
+        """
+        return -1 / self.scan_rate * self.arrhenius(t, Tf, Ea) * xn
+
+    def fun(self, t, kN, bN, kU, bU, Tf, Ea):
+        """Returns aggregation signal at given temperature
+        Signal(T) = (kN*T + bN)*xn +(kU*T + bU)*xu
+        k, b - baseline parameters (N or U state)
+        xn, xu - fraciton native/unfolded, xn + xu = 1
+        in other words:
+        Signal(T) = kU*T + bU + (kN*T + bN - kU*T - bU) * xn.
+        """
+        # step 1: numerically integrate agg_ode for given parameters - gives xn(T)
+        ivp_result = solve_ivp(
+            self.ode,
+            t_span=[min(t), max(t)],
+            t_eval=t,
+            y0=[self.xn],
+            args=(Tf, Ea),
+            method="BDF",
+        )
+
+        # step 2: return the result of the signal
+        return kU * t + bU + (kN * t + bN - kU * t - bU) * ivp_result.y[0, :]
+
+    def param_init(self, input_data=None):
+        if input_data is None:
+            # without input data it's hard to guess starting values
+            # but it seems that starting with a high Tf may help
+            # kN, bN, kU, bU, Tf, Ea
+            return (0, 1, 0, 1, 400, 100000)
+        else:
+            # the baselines will have a better guess in MoltenProt
+            # since Tf may or may not coincide with the derivative peak
+            # it is taken as the middle of the temp range
+            return (
+                0,
+                1,
+                0,
+                1,
+                min(input_data.index)
+                + (max(input_data.index) - min(input_data.index)) / 2.0,
+                50000,
+            )
+
+    def param_bounds(self, input_data=None):
+        # NOTE it may happen that Tf is ouside the temperature range, but the curve is still OK
+        # also, MoltenProt calculations are always done in Kelvins
+        # thus, the default bounds are quite relaxed
+        if input_data is None:
+            return (
+                (-np.inf, -np.inf, -np.inf, -np.inf, 1, 0),
+                (np.inf, np.inf, np.inf, np.inf, np.inf, np.inf),
+            )
+        else:
+            return (
+                (-np.inf, -np.inf, -np.inf, -np.inf, min(input_data.index) - 100, 0),
+                (np.inf, np.inf, np.inf, np.inf, max(input_data.index) + 100, np.inf),
+            )
+
+
+"""
+NOTE given the current computational workload even of a simple kinetic equiation implementation of more complex
+equations makes little sense
+
+class IrreversibleThreeState(IrreversibleTwoState):
+    short_name = 'irrev'
+    _description = "N -> I -> U"
+"""
+
+# TODO another (potentially useful) variant could be to implement N <-kF,kR-> U without any A
+# the class can be called ReversibleTwoState, because there are only U and N, and U can sometimes become N
+
+
+class LumryEyring(IrreversibleTwoState):
+    short_name = "lumry_eyring"
+    _description = "N <- kF,kR -> U -> A"
+
+    # the kF/kR at std temperature; take as -log10 to have higher values for higher stability
+    sortby = "pk_ratio_std"  # type: ignore[assignment]
+
+    # fmt: off
+    #        ~_~
+    z0 =    (0,0)     # starting values for system of 2x ode
+    #      /,www,\
+    #      ||wWw||    an owl?!
+    #       \|||/
+    #     ~~~m'm~~~
+    # fmt: on
+    def __init__(self, scan_rate, tfea=[None, None]) -> None:
+        # scan rate is an essential parameter and must thus be set explicitly
+        if scan_rate is not None:
+            self.scan_rate = scan_rate
+        else:
+            msg = f"{self.short_name} model requires scan_rate to be set"
+            raise ValueError(
+                msg,
+            )
+
+        # tfea are characteristics of the irreversible aggregation reaction (Tf and Ea)
+        # and are obtained separately; they are fixed during fitting
+        self.tfea = tfea
+
+    def set_fixed(self, tfea):
+        """Set parameters that are needed in the fit equation, but not being fit
+        tfea must be a list with two values [Tf, Ea] (floats)
+        NOTE only the length of the input is checked, but not the type of list elements.
+        """
+        if len(tfea) != 2:
+            msg = "LE model requires tfea to be a list of two floats"
+            raise ValueError(msg)
+        self.tfea = tfea
+
+    def ode(
+        self,
+        t,
+        z0,
+        TfF,
+        EaF,
+        TfR,
+        EaR,
+        Tf2,
+        Ea2,
+    ):
+        """A function to process a system of differential equations x and y packaged into array z = [x,y]
+        Implements a general case of Lumry-Eyring equation (case D in Mazurenko2017)
+        N <- kF, kR -> U - k2 -> A
+        there is an quasi-equilibrium between N and U, but irreversible conversion to A
+        As described in the paper:
+
+        fN + fU + fA = 1
+        dfAdT = 1/v * k2(T) * fU(T) ...................................... (eq. y)
+        dfUdT = 1/v* ( kF(T)*fN - (kR(T)+k2(T))*fU )
+
+        since fN = 1 - fU - fA, can rewrite dfUdT like this:
+
+        dfUdT = 1/v * ( kF(T) * (1 - fU - fA) - ( kR(T)+k2(T) )*fU ) ..... (eq. x)
+
+        the formula for rate in reaction i would then be:
+        ki(T) = exp(-Eai/R * (1/T - 1/Tfi))
+        i can be F or R for N<->U and 2 for U->A
+
+        t - temperature scale (converted to kinetic time using v, the scan rate (global constant)
+        z0 - starting values for equations in the system [fU=0, fA=0]
+        """
+        # unpack initial values
+        x0, y0 = z0
+
+        dxdt = (
+            1
+            / self.scan_rate
+            * (
+                self.arrhenius(t, TfF, EaF) * (1 - x0 - y0)
+                - (self.arrhenius(t, TfR, EaR) + self.arrhenius(t, Tf2, Ea2)) * x0
+            )
+        )
+        dydt = 1 / self.scan_rate * self.arrhenius(t, Tf2, Ea2) * x0
+
+        return (dxdt, dydt)
+
+    # def fun(self, T, TfF, EaF, TfR, EaR, kNF, bNF, kUF, kAF, bAF):
+    def fun(self, T, kN, bN, kU, bU, kI, TfF, EaF, TfR, EaR):
+        """Uses pre-computed Tf and Ea for scattering data to model fluorescence signal
+        NOTE the original order and naming of the parameters is as follows:
+        T, TfF, EaF, TfR, EaR, kNF, bNF, kUF, kAF, bAF
+        T - temperature/time
+        TfF - temperature at which the rate constant kF=1 (reaction N->U)
+        EaF - activation energy for N->U
+        TfR,EaR - Tf and Ea for reaction U->N
+        kNF, bNF - slope and intercept for the baseline of state N
+        kUF - slope for the fluorescence of state U (assumed to be short-lived and not abundant, see Bedouelle2016)
+        kAF, bAF - slope/intercept for the baseline of state A, which ultimately makes up the post-transition baseline.
+
+        For MoltenProt to recognize pre- and post- baseline parameters the have to be placed first
+        and renamed as follows:
+        kNF, bNF - kN, bN
+        kAF, bAF - kU, bU
+        kUF - kI (similar to three-state cases above)
+
+        NOTE this is not the only way to define the law of signal; for instance, we can assume
+        that the fluorescence has a similar time dependence for states U and A; then kI (aka kUF) is not needed
+        and the law of signal will be:
+        (kN * T + bN) * fN + (kU*T + bU) * (fA + fU)
+        """
+        ivp_result = solve_ivp(
+            self.ode,
+            t_span=[min(T), max(T)],
+            y0=self.z0,
+            args=(TfF, EaF, TfR, EaR, self.tfea[0], self.tfea[1]),
+            t_eval=T,
+            method="BDF",
+        )
+
+        # based on diff eqn compute fractions of each state
+        fU = ivp_result.y[0, :]
+        fA = ivp_result.y[1, :]
+        fN = 1 - fU - fA
+        # return modelled fluorescence signal
+        return (kN * T + bN) * fN + kI * fU + (kU * T + bU) * fA
+
+    def param_init(self, input_data=None):
+        if input_data is None:
+            # without input data it's hard to guess starting values
+            # kN, bN, kU, bU, kI, TfF, EaF, TfR, EaR
+            return (0, 1, 0, 1, 0, 400, 100000, 400, 100000)
+        else:
+            # the baselines will have a better guess in MoltenProt
+            # Tf is probably not similar to Tm, however, it makes sense to try the middle of the range
+            temp_range = max(input_data.index) - min(input_data.index)
+            return (
+                0,
+                1,
+                0,
+                1,
+                0,
+                min(input_data.index) + temp_range / 2.0,
+                50000,
+                min(input_data.index) + temp_range / 2.0,
+                50000,
+            )
+
+    def param_bounds(self, input_data=None):
+        # NOTE it may happen that Tf is ouside the temperature range, but the curve is still OK
+        # also, MoltenProt calculations are always done in Kelvins
+        # thus, the default bounds are quite relaxed
+        # NOTE Tf should not be zero, see the Arrhenius formula
+        if input_data is None:
+            return ((-np.inf, -np.inf, -np.inf, -np.inf, -np.inf, 1, 0, 1, 0), np.inf)
+        else:
+            return (
+                (
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    -np.inf,
+                    min(input_data.index) - 100,
+                    0,
+                    min(input_data.index) - 100,
+                    0,
+                ),
+                (
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    np.inf,
+                    max(input_data.index) + 100,
+                    np.inf,
+                    max(input_data.index) + 100,
+                    np.inf,
+                ),
+            )

From 08097f3a134f907a236170883c1f6f35028b73ed Mon Sep 17 00:00:00 2001
From: Najib Ishaq <najib_ishaq@zoho.com>
Date: Mon, 6 May 2024 07:51:21 -0400
Subject: [PATCH 02/18] chore: added ict.yml file

---
 .../rt-cetsa-moltprot-tool/.bumpversion.cfg   |  2 +
 regression/rt-cetsa-moltprot-tool/README.md   | 12 ++---
 regression/rt-cetsa-moltprot-tool/ict.yml     | 51 +++++++++++++++++++
 regression/rt-cetsa-moltprot-tool/plugin.json |  8 +--
 .../rt-cetsa-moltprot-tool/run-plugin.sh      |  4 +-
 5 files changed, 65 insertions(+), 12 deletions(-)
 create mode 100644 regression/rt-cetsa-moltprot-tool/ict.yml

diff --git a/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg b/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
index d858009..5c3e4be 100644
--- a/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
+++ b/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
@@ -27,3 +27,5 @@ replace = version = "{new_version}"
 [bumpversion:file:README.md]
 
 [bumpversion:file:src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py]
+
+[bumpversion:file:ict.yml]
diff --git a/regression/rt-cetsa-moltprot-tool/README.md b/regression/rt-cetsa-moltprot-tool/README.md
index 24a28c6..3d85f45 100644
--- a/regression/rt-cetsa-moltprot-tool/README.md
+++ b/regression/rt-cetsa-moltprot-tool/README.md
@@ -16,9 +16,9 @@ If WIPP is running, navigate to the plugins page and add a new plugin. Paste the
 
 This plugin takes eight input argument and one output argument:
 
-| Name               | Description                                                | I/O    | Type          |
-|--------------------|------------------------------------------------------------|--------|---------------|
-| `--inpDir`         | Input data collection to be processed by this plugin       | Input  | genericData   |
-| `--filePattern`    | Pattern to parse tabular files                             | Input  | string        |
-| `--outDir`         | Output file                                                | Output | genericData   |
-| `--preview`        | Generate JSON file with outputs                            | Output | JSON          |
+| Name        | Description                                        | I/O    | Type        |
+|-------------|----------------------------------------------------|--------|-------------|
+| `--inpDir`  | Input data collection to be processed by this tool | Input  | genericData |
+| `--pattern` | Pattern to parse input files                       | Input  | string      |
+| `--outDir`  | Output file                                        | Output | genericData |
+| `--preview` | Generate JSON file with outputs                    | Output | JSON        |
diff --git a/regression/rt-cetsa-moltprot-tool/ict.yml b/regression/rt-cetsa-moltprot-tool/ict.yml
new file mode 100644
index 0000000..bf5c3fe
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/ict.yml
@@ -0,0 +1,51 @@
+author:
+- Nick Schaub
+- Najib Ishaq
+contact: nick.schaub@nih.gov
+container: polusai/rt-cetsa-moltprot-tool:0.1.0
+description: Run regression analysis for the RT-CETSA pipeline.
+entrypoint: python3 -m polus.tabular.regression.rt_cetsa_moltprot
+inputs:
+- description: Input data collection to be processed by this tool
+  format:
+  - inpDir
+  name: inpDir
+  required: true
+  type: path
+- description: Filepattern to parse input files
+  format:
+  - pattern
+  name: pattern
+  required: false
+  type: string
+- description: Generate an output preview.
+  format:
+  - preview
+  name: preview
+  required: false
+  type: boolean
+name: polusai/RTCETSAMoltprot
+outputs:
+- description: Output collection
+  format:
+  - outDir
+  name: outDir
+  required: true
+  type: path
+repository: https://github.com/PolusAI/tabular-tools
+specVersion: 1.0.0
+title: RT-CETSA Moltprot
+ui:
+- description: Input data collection
+  key: inputs.inpDir
+  title: Input data collection
+  type: path
+- description: Filepattern to parse input files
+  key: inputs.pattern
+  title: pattern
+  type: text
+- description: Generate an output preview.
+  key: inputs.preview
+  title: Preview example output of this plugin
+  type: checkbox
+version: 0.1.0
diff --git a/regression/rt-cetsa-moltprot-tool/plugin.json b/regression/rt-cetsa-moltprot-tool/plugin.json
index e48b1d0..fc1f91d 100644
--- a/regression/rt-cetsa-moltprot-tool/plugin.json
+++ b/regression/rt-cetsa-moltprot-tool/plugin.json
@@ -18,11 +18,11 @@
     {
       "name": "inpDir",
       "type": "genericData",
-      "description": "Input data collection to be processed by this plugin",
+      "description": "Input data collection to be processed by this tool",
       "required": true
     },
     {
-      "name": "filePattern",
+      "name": "pattern",
       "type": "string",
       "description": "Pattern to parse input files",
       "default": ".+",
@@ -43,8 +43,8 @@
       "description": "Input data collection to be processed by this plugin"
     },
     {
-      "key": "inputs.filePattern",
-      "title": "filePattern",
+      "key": "inputs.pattern",
+      "title": "pattern",
       "description": "Pattern to parse input files",
       "default": ".+"
     }
diff --git a/regression/rt-cetsa-moltprot-tool/run-plugin.sh b/regression/rt-cetsa-moltprot-tool/run-plugin.sh
index bea8c33..97d7005 100644
--- a/regression/rt-cetsa-moltprot-tool/run-plugin.sh
+++ b/regression/rt-cetsa-moltprot-tool/run-plugin.sh
@@ -4,7 +4,7 @@ datapath=$(readlink --canonicalize data)
 
 # Inputs
 inpDir=/data/input
-filePattern=".*"
+pattern=".*"
 
 # Output paths
 outDir=/data/output
@@ -16,5 +16,5 @@ docker run --mount type=bind,source=${datapath},target=/data/  \
             --env POLUS_LOG=${LOGLEVEL} \
             polusai/rt-cetsa-moltprot-tool:${version} \
             --inpDir ${inpDir} \
-            --filePattern ${filePattern} \
+            --pattern ${pattern} \
             --outDir ${outDir}

From ec94a098eab4d766a57dc7b7971c8ba23e850a56 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Mon, 13 May 2024 11:02:59 -0400
Subject: [PATCH 03/18] fix: moltenprot tool.

---
 regression/rt-cetsa-moltprot-tool/README.md   | 12 ++---
 regression/rt-cetsa-moltprot-tool/plugin.json | 10 ++--
 .../regression/rt_cetsa_moltprot/__init__.py  |  8 +--
 .../regression/rt_cetsa_moltprot/__main__.py  | 49 ++++++++++++++++---
 4 files changed, 55 insertions(+), 24 deletions(-)

diff --git a/regression/rt-cetsa-moltprot-tool/README.md b/regression/rt-cetsa-moltprot-tool/README.md
index 3d85f45..7038d99 100644
--- a/regression/rt-cetsa-moltprot-tool/README.md
+++ b/regression/rt-cetsa-moltprot-tool/README.md
@@ -16,9 +16,9 @@ If WIPP is running, navigate to the plugins page and add a new plugin. Paste the
 
 This plugin takes eight input argument and one output argument:
 
-| Name        | Description                                        | I/O    | Type        |
-|-------------|----------------------------------------------------|--------|-------------|
-| `--inpDir`  | Input data collection to be processed by this tool | Input  | genericData |
-| `--pattern` | Pattern to parse input files                       | Input  | string      |
-| `--outDir`  | Output file                                        | Output | genericData |
-| `--preview` | Generate JSON file with outputs                    | Output | JSON        |
+| Name            | Description                                        | I/O    | Type        |
+|-----------------|----------------------------------------------------|--------|-------------|
+| `--inpDir`      | Input data collection to be processed by this tool | Input  | genericData |
+| `--filePattern` | File Pattern to parse input files                  | Input  | string      |
+| `--outDir`      | Output file                                        | Output | genericData |
+| `--preview`     | Generate JSON file with outputs                    | Output | JSON        |
diff --git a/regression/rt-cetsa-moltprot-tool/plugin.json b/regression/rt-cetsa-moltprot-tool/plugin.json
index fc1f91d..92dcd14 100644
--- a/regression/rt-cetsa-moltprot-tool/plugin.json
+++ b/regression/rt-cetsa-moltprot-tool/plugin.json
@@ -22,9 +22,9 @@
       "required": true
     },
     {
-      "name": "pattern",
+      "name": "filePattern",
       "type": "string",
-      "description": "Pattern to parse input files",
+      "description": "File Pattern to parse input files",
       "default": ".+",
       "required": false
     }
@@ -43,9 +43,9 @@
       "description": "Input data collection to be processed by this plugin"
     },
     {
-      "key": "inputs.pattern",
-      "title": "pattern",
-      "description": "Pattern to parse input files",
+      "key": "inputs.filePattern",
+      "title": "File Pattern",
+      "description": "File Pattern to parse input files",
       "default": ".+"
     }
   ]
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
index 6d3ec01..b7d7e48 100644
--- a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
@@ -39,10 +39,4 @@ def fit_data(file_path: pathlib.Path) -> pandas.DataFrame:
     fit.PrepareData()
     fit.ProcessData()
 
-    return fit.plate_results.sort_values("BS_factor")
-
-
-def gen_out_path(file_path: pathlib.Path, out_dir: pathlib.Path) -> pathlib.Path:
-    """Generate the output path."""
-    file_name = file_path.stem + "_moltprot" + POLUS_TAB_EXT
-    return out_dir / file_name
+    return fit
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
index 3c43a9f..4a36b08 100644
--- a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
@@ -8,7 +8,6 @@
 import filepattern
 import typer
 from polus.tabular.regression.rt_cetsa_moltprot import fit_data
-from polus.tabular.regression.rt_cetsa_moltprot import gen_out_path
 
 # Initialize the logger
 logging.basicConfig(
@@ -18,6 +17,8 @@
 logger = logging.getLogger("polus.tabular.transforms.tabular_merger")
 logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO))
 
+POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", ".csv")
+
 app = typer.Typer()
 
 
@@ -25,6 +26,7 @@
 def main(
     inp_dir: pathlib.Path = typer.Option(
         ...,
+        "--inpDir",
         help="Input directory containing the data files.",
         exists=True,
         dir_okay=True,
@@ -33,14 +35,17 @@ def main(
     ),
     pattern: str = typer.Option(
         ".+",
+        "--filePattern",
         help="Pattern to match the files in the input directory.",
     ),
     preview: bool = typer.Option(
         False,
+        "--preview",
         help="Preview the files that will be processed.",
     ),
     out_dir: pathlib.Path = typer.Option(
         ...,
+        "--outDir",
         help="Output directory to save the results.",
         exists=True,
         dir_okay=True,
@@ -56,20 +61,52 @@ def main(
     logger.info(f"File Pattern: {pattern}")
     logger.info(f"Output directory: {out_dir}")
 
+    if POLUS_TAB_EXT != ".csv":
+        msg = "this tool can currently only process csv files."
+        raise ValueError(msg)
+
     fp = filepattern.FilePattern(inp_dir, pattern)
     inp_files = [f[1][0] for f in fp()]
 
+    for f in inp_files:
+        if not f.suffix == POLUS_TAB_EXT:
+            raise ValueError(
+                f"this tool can only process {POLUS_TAB_EXT} files. Got {f}",
+            )
+
     if preview:
-        out_json = {"files": [gen_out_path(f, out_dir) for f in inp_files]}
+        outputs: list[str] = []
+        for f in inp_files:
+            fit_params_path = f.stem + "_moltenprot_params" + POLUS_TAB_EXT
+            fit_curves_path = f.stem + "_moltenprot_curves" + POLUS_TAB_EXT
+            outputs = [*outputs, fit_params_path, fit_curves_path]
+        out_json = {"files": outputs}
         with (out_dir / "preview.json").open("w") as f:
             json.dump(out_json, f, indent=2)
         return
 
     for f in inp_files:
-        logger.info(f"Processing file: {f}")
-        out_path = gen_out_path(f, out_dir)
-        df = fit_data(f)
-        df.to_csv(out_path, index=True)
+        logger.info(f"Processing plate timeserie: {f}")
+        fit = fit_data(f)
+        fit_params_path = out_dir / (f.stem + "_moltenprot_params" + POLUS_TAB_EXT)
+        fit_curves_path = out_dir / (f.stem + "_moltenprot_curves" + POLUS_TAB_EXT)
+
+        # sort fit_params by row/column
+        fit_params = fit.plate_results
+        fit_params["_index"] = fit_params.index
+        fit_params["letter"] = fit_params.apply(lambda row: row._index[:1], axis=1)
+        fit_params["number"] = fit_params.apply(
+            lambda row: row._index[1:],
+            axis=1,
+        ).astype(int)
+        fit_params = fit_params.drop(columns="_index")
+        fit_params = fit_params.sort_values(["letter", "number"])
+        fit_params.to_csv(fit_params_path, index=True)
+
+        # keep only 2 signicant digits for temperature index
+        fit_curves = fit.plate_raw_corr
+        fit_curves.index = fit_curves.index.map(lambda t: round(t, 2))
+        fit_curves.to_csv(fit_curves_path, index=True)
 
 
 if __name__ == "__main__":

From c0a0bad2a76742aada39cc8c18880a64752e3f85 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 14 May 2024 03:16:03 -0400
Subject: [PATCH 04/18] feat: containerized and cwl-able

---
 .../rt-cetsa-moltprot-tool/.bumpversion.cfg   | 12 ++++---
 regression/rt-cetsa-moltprot-tool/Dockerfile  |  6 ++--
 regression/rt-cetsa-moltprot-tool/README.md   |  5 ++-
 .../rt-cetsa-moltprot-tool/build-docker.sh    |  4 +++
 regression/rt-cetsa-moltprot-tool/ict.yml     |  3 +-
 regression/rt-cetsa-moltprot-tool/plugin.json |  6 ++--
 .../rt-cetsa-moltprot-tool/pyproject.toml     |  3 +-
 .../rt_cetsa_moltenprot.cwl                   | 32 +++++++++++++++++++
 .../rt-cetsa-moltprot-tool/run-plugin.sh      |  6 ++--
 .../{VERSION => RT_CETSA_VERSION}             |  0
 .../regression/rt_cetsa_moltprot/__init__.py  |  5 ++-
 .../regression/rt_cetsa_moltprot/__main__.py  |  2 +-
 .../regression/rt_cetsa_moltprot/core.py      |  2 +-
 13 files changed, 64 insertions(+), 22 deletions(-)
 create mode 100755 regression/rt-cetsa-moltprot-tool/build-docker.sh
 create mode 100644 regression/rt-cetsa-moltprot-tool/rt_cetsa_moltenprot.cwl
 mode change 100644 => 100755 regression/rt-cetsa-moltprot-tool/run-plugin.sh
 rename regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/{VERSION => RT_CETSA_VERSION} (100%)

diff --git a/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg b/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
index 5c3e4be..b3d8c10 100644
--- a/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
+++ b/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
@@ -16,16 +16,18 @@ values =
 
 [bumpversion:part:dev]
 
+[bumpversion:file:VERSION]
+
 [bumpversion:file:pyproject.toml]
 search = version = "{current_version}"
 replace = version = "{new_version}"
 
-[bumpversion:file:plugin.json]
-
-[bumpversion:file:VERSION]
-
 [bumpversion:file:README.md]
 
-[bumpversion:file:src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py]
+[bumpversion:file:src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py]
+
+[bumpversion:file:plugin.json]
 
 [bumpversion:file:ict.yml]
+
+[bumpversion:file:rt_cetsa_moltenprot.cwl]
diff --git a/regression/rt-cetsa-moltprot-tool/Dockerfile b/regression/rt-cetsa-moltprot-tool/Dockerfile
index dd390da..db91d4c 100755
--- a/regression/rt-cetsa-moltprot-tool/Dockerfile
+++ b/regression/rt-cetsa-moltprot-tool/Dockerfile
@@ -1,9 +1,9 @@
-FROM polusai/bfio:2.3.6
+FROM polusai/bfio:2.1.9
 
 # environment variables defined in polusai/bfio
 ENV EXEC_DIR="/opt/executables"
 ENV POLUS_IMG_EXT=".ome.tif"
-ENV POLUS_TAB_EXT=".arrow"
+ENV POLUS_TAB_EXT=".csv"
 ENV POLUS_LOG="INFO"
 
 # Work directory defined in the base container
@@ -16,5 +16,5 @@ COPY src ${EXEC_DIR}/src
 
 RUN pip3 install ${EXEC_DIR} --no-cache-dir
 
-ENTRYPOINT ["python3", "-m", "polus.tabular.regression.rt_cetsa_moltprot"]
+ENTRYPOINT ["python3", "-m", "polus.tabular.regression.rt_cetsa_moltenprot"]
 CMD ["--help"]
diff --git a/regression/rt-cetsa-moltprot-tool/README.md b/regression/rt-cetsa-moltprot-tool/README.md
index 7038d99..50ad929 100644
--- a/regression/rt-cetsa-moltprot-tool/README.md
+++ b/regression/rt-cetsa-moltprot-tool/README.md
@@ -1,7 +1,6 @@
-# RT_CETSA Moltprot Regression (v0.1.0)
+# RT_CETSA MoltenProt (v0.1.0)
 
-This WIPP plugin runs regression analysis for the RT-CETSA pipeline.
-The input csv file should be sorted by `Temperature` column.
+This WIPP plugin runs moltenprot regression for the RT-CETSA pipeline.
 
 ## Building
 
diff --git a/regression/rt-cetsa-moltprot-tool/build-docker.sh b/regression/rt-cetsa-moltprot-tool/build-docker.sh
new file mode 100755
index 0000000..80539db
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/build-docker.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+version=$(<VERSION)
+docker build . -t polusai/rt-cetsa-moltenprot-tool:"${version}"
diff --git a/regression/rt-cetsa-moltprot-tool/ict.yml b/regression/rt-cetsa-moltprot-tool/ict.yml
index bf5c3fe..da600de 100644
--- a/regression/rt-cetsa-moltprot-tool/ict.yml
+++ b/regression/rt-cetsa-moltprot-tool/ict.yml
@@ -1,9 +1,10 @@
 author:
 - Nick Schaub
+- Antoine Gerardin
 - Najib Ishaq
 contact: nick.schaub@nih.gov
 container: polusai/rt-cetsa-moltprot-tool:0.1.0
-description: Run regression analysis for the RT-CETSA pipeline.
+description: Run moltenprot regression for the RT-CETSA pipeline.
 entrypoint: python3 -m polus.tabular.regression.rt_cetsa_moltprot
 inputs:
 - description: Input data collection to be processed by this tool
diff --git a/regression/rt-cetsa-moltprot-tool/plugin.json b/regression/rt-cetsa-moltprot-tool/plugin.json
index 92dcd14..63877b2 100644
--- a/regression/rt-cetsa-moltprot-tool/plugin.json
+++ b/regression/rt-cetsa-moltprot-tool/plugin.json
@@ -2,8 +2,8 @@
   "name": "RT-CETSA Moltprot",
   "version": "0.1.0",
   "title": "RT-CETSA Moltprot",
-  "description": "Run regression analysis for the RT-CETSA pipeline.",
-  "author": "Nicholas Schaub (nick.schaub@nih.gov), Najib Ishaq (najib.ishaq@nih.gov)",
+  "description": "Run moltenprot for the RT-CETSA pipeline.",
+  "author": "Nicholas Schaub (nick.schaub@nih.gov), Antoine Gerardin (antoine.gerardin@nih.gov), Najib Ishaq (najib.ishaq@nih.gov)",
   "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
   "repository": "https://github.com/PolusAI/tabular-tools",
   "website": "https://ncats.nih.gov/preclinical/core/informatics",
@@ -12,7 +12,7 @@
   "baseCommand": [
     "python3",
     "-m",
-    "polus.tabular.regression.rt_cetsa_moltprot"
+    "polus.tabular.regression.rt_cetsa_moltenprot"
   ],
   "inputs": [
     {
diff --git a/regression/rt-cetsa-moltprot-tool/pyproject.toml b/regression/rt-cetsa-moltprot-tool/pyproject.toml
index 8627e09..aa90b1f 100644
--- a/regression/rt-cetsa-moltprot-tool/pyproject.toml
+++ b/regression/rt-cetsa-moltprot-tool/pyproject.toml
@@ -1,9 +1,10 @@
 [tool.poetry]
 name = "polus_tabular_regression_rt_cetsa_moltprot"
 version = "0.1.0"
-description = "Run regression analysis for the RT-CETSA pipeline."
+description = "Run moltenprot for the RT-CETSA pipeline."
 authors = [
     "Nick Schaub <nick.schaub@nih.gov>",
+    "Antoine Gerardin <antoine.gerardin@nih.gov>",
     "Najib Ishaq <najib.ishaq@nih.gov>",
 ]
 readme = "README.md"
diff --git a/regression/rt-cetsa-moltprot-tool/rt_cetsa_moltenprot.cwl b/regression/rt-cetsa-moltprot-tool/rt_cetsa_moltenprot.cwl
new file mode 100644
index 0000000..ee45fd6
--- /dev/null
+++ b/regression/rt-cetsa-moltprot-tool/rt_cetsa_moltenprot.cwl
@@ -0,0 +1,32 @@
+class: CommandLineTool
+cwlVersion: v1.2
+inputs:
+  inpDir:
+    inputBinding:
+      prefix: --inpDir
+    type: Directory
+  filePattern:
+    inputBinding:
+      prefix: --filePattern
+    type: string?
+  preview:
+    inputBinding:
+      prefix: --preview
+    type: boolean?
+  outDir:
+    inputBinding:
+      prefix: --outDir
+    type: Directory
+outputs:
+  outDir:
+    outputBinding:
+      glob: $(inputs.outDir.basename)
+    type: Directory
+requirements:
+  DockerRequirement:
+    dockerPull: polusai/rt-cetsa-moltenprot-tool:0.1.0
+  InitialWorkDirRequirement:
+    listing:
+    - entry: $(inputs.outDir)
+      writable: true
+  InlineJavascriptRequirement: {}
diff --git a/regression/rt-cetsa-moltprot-tool/run-plugin.sh b/regression/rt-cetsa-moltprot-tool/run-plugin.sh
old mode 100644
new mode 100755
index 97d7005..73c9245
--- a/regression/rt-cetsa-moltprot-tool/run-plugin.sh
+++ b/regression/rt-cetsa-moltprot-tool/run-plugin.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 version=$(<VERSION)
-datapath=$(readlink --canonicalize data)
+datapath=$(readlink -f data)
 
 # Inputs
 inpDir=/data/input
@@ -14,7 +14,7 @@ LOGLEVEL=INFO
 
 docker run --mount type=bind,source=${datapath},target=/data/  \
             --env POLUS_LOG=${LOGLEVEL} \
-            polusai/rt-cetsa-moltprot-tool:${version} \
+            polusai/rt-cetsa-moltenprot-tool:${version} \
             --inpDir ${inpDir} \
-            --pattern ${pattern} \
+            --filePattern ${pattern} \
             --outDir ${outDir}
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/VERSION b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/RT_CETSA_VERSION
similarity index 100%
rename from regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/VERSION
rename to regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/RT_CETSA_VERSION
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
index b7d7e48..62b4e6c 100644
--- a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
@@ -4,11 +4,14 @@
 
 import os
 import pathlib
+import warnings
 
 import pandas
 
 from . import core
-from . import models
+
+# Suppress FutureWarning messages coming from pandas
+warnings.simplefilter(action="ignore", category=FutureWarning)
 
 POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", ".csv")
 
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
index 4a36b08..569bf40 100644
--- a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
@@ -14,7 +14,7 @@
     format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
     datefmt="%d-%b-%y %H:%M:%S",
 )
-logger = logging.getLogger("polus.tabular.transforms.tabular_merger")
+logger = logging.getLogger(__file__)
 logger.setLevel(os.environ.get("POLUS_LOG", logging.INFO))
 
 POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", ".csv")
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py
index 31ba0f1..e1160e5 100644
--- a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py
+++ b/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py
@@ -82,7 +82,7 @@
 
 # MoltenProt is stored in a plain-text file VERSION (also used by setup.py)
 # extract and save it to a variable
-with open(os.path.join(__location__, "VERSION")) as version_file:
+with open(os.path.join(__location__, "RT_CETSA_VERSION")) as version_file:
     __version__ = version_file.read().strip()
 
 # get scipy version (some methods may not be available in earlier versions)

From d1815fa89718fa6e900c079be2d107e7d3404f93 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 14 May 2024 05:57:29 -0400
Subject: [PATCH 05/18] feat: add rt_cetsa analysis.

---
 .../rt-cetsa-analysis-tool/.bumpversion.cfg   | 33 ++++++++
 regression/rt-cetsa-analysis-tool/Dockerfile  | 20 +++++
 regression/rt-cetsa-analysis-tool/README.md   | 23 ++++++
 regression/rt-cetsa-analysis-tool/VERSION     |  1 +
 .../rt-cetsa-analysis-tool/build-docker.sh    |  4 +
 regression/rt-cetsa-analysis-tool/ict.yml     | 52 +++++++++++++
 regression/rt-cetsa-analysis-tool/plugin.json | 52 +++++++++++++
 .../rt-cetsa-analysis-tool/pyproject.toml     | 77 +++++++++++++++++++
 .../rt_cetsa_analysis.cwl                     | 32 ++++++++
 .../rt-cetsa-analysis-tool/run-plugin.sh      | 20 +++++
 10 files changed, 314 insertions(+)
 create mode 100644 regression/rt-cetsa-analysis-tool/.bumpversion.cfg
 create mode 100755 regression/rt-cetsa-analysis-tool/Dockerfile
 create mode 100644 regression/rt-cetsa-analysis-tool/README.md
 create mode 100644 regression/rt-cetsa-analysis-tool/VERSION
 create mode 100755 regression/rt-cetsa-analysis-tool/build-docker.sh
 create mode 100644 regression/rt-cetsa-analysis-tool/ict.yml
 create mode 100644 regression/rt-cetsa-analysis-tool/plugin.json
 create mode 100644 regression/rt-cetsa-analysis-tool/pyproject.toml
 create mode 100644 regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
 create mode 100755 regression/rt-cetsa-analysis-tool/run-plugin.sh

diff --git a/regression/rt-cetsa-analysis-tool/.bumpversion.cfg b/regression/rt-cetsa-analysis-tool/.bumpversion.cfg
new file mode 100644
index 0000000..3bc5c4c
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/.bumpversion.cfg
@@ -0,0 +1,33 @@
+[bumpversion]
+current_version = 0.1.0
+commit = True
+tag = False
+parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
+serialize = 
+	{major}.{minor}.{patch}-{release}{dev}
+	{major}.{minor}.{patch}
+
+[bumpversion:part:release]
+optional_value = _
+first_value = dev
+values = 
+	dev
+	_
+
+[bumpversion:part:dev]
+
+[bumpversion:file:VERSION]
+
+[bumpversion:file:pyproject.toml]
+search = version = "{current_version}"
+replace = version = "{new_version}"
+
+[bumpversion:file:README.md]
+
+[bumpversion:file:src/polus/tabular/regression/rt_cetsa_analysis/__init__.py]
+
+[bumpversion:file:plugin.json]
+
+[bumpversion:file:ict.yml]
+
+[bumpversion:file:rt_cetsa_analysis.cwl]
diff --git a/regression/rt-cetsa-analysis-tool/Dockerfile b/regression/rt-cetsa-analysis-tool/Dockerfile
new file mode 100755
index 0000000..0e29286
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/Dockerfile
@@ -0,0 +1,20 @@
+FROM polusai/bfio:2.1.9
+
+# environment variables defined in polusai/bfio
+ENV EXEC_DIR="/opt/executables"
+ENV POLUS_IMG_EXT=".ome.tif"
+ENV POLUS_TAB_EXT=".csv"
+ENV POLUS_LOG="INFO"
+
+# Work directory defined in the base container
+WORKDIR ${EXEC_DIR}
+
+COPY pyproject.toml ${EXEC_DIR}
+COPY VERSION ${EXEC_DIR}
+COPY README.md ${EXEC_DIR}
+COPY src ${EXEC_DIR}/src
+
+RUN pip3 install ${EXEC_DIR} --no-cache-dir
+
+ENTRYPOINT ["python3", "-m", "polus.tabular.regression.rt_cetsa_analysis"]
+CMD ["--help"]
diff --git a/regression/rt-cetsa-analysis-tool/README.md b/regression/rt-cetsa-analysis-tool/README.md
new file mode 100644
index 0000000..acde158
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/README.md
@@ -0,0 +1,23 @@
+# RT_CETSA Analysis Tool (v0.1.0)
+
+This WIPP plugin runs statistical analysis for the RT-CETSA pipeline.
+
+## Building
+
+To build the Docker image for the conversion plugin, run
+`./build-docker.sh`.
+
+## Install WIPP Plugin
+
+If WIPP is running, navigate to the plugins page and add a new plugin. Paste the contents of `plugin.json` into the pop-up window and submit.
+
+## Options
+
+This plugin takes eight input argument and one output argument:
+
+| Name            | Description                                        | I/O    | Type        |
+|-----------------|----------------------------------------------------|--------|-------------|
+| `--inpDir`      | Input data collection to be processed by this tool | Input  | genericData |
+| `--filePattern` | File Pattern to parse input files                  | Input  | string      |
+| `--outDir`      | Output file                                        | Output | genericData |
+| `--preview`     | Generate JSON file with outputs                    | Output | JSON        |
diff --git a/regression/rt-cetsa-analysis-tool/VERSION b/regression/rt-cetsa-analysis-tool/VERSION
new file mode 100644
index 0000000..6e8bf73
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/VERSION
@@ -0,0 +1 @@
+0.1.0
diff --git a/regression/rt-cetsa-analysis-tool/build-docker.sh b/regression/rt-cetsa-analysis-tool/build-docker.sh
new file mode 100755
index 0000000..93d9b45
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/build-docker.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+version=$(<VERSION)
+docker build . -t polusai/rt-cetsa-analysis-tool:"${version}"
diff --git a/regression/rt-cetsa-analysis-tool/ict.yml b/regression/rt-cetsa-analysis-tool/ict.yml
new file mode 100644
index 0000000..50b7487
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/ict.yml
@@ -0,0 +1,52 @@
+author:
+- Nick Schaub
+- Antoine Gerardin
+- Najib Ishaq
+contact: nick.schaub@nih.gov
+container: polusai/rt-cetsa-analysis-tool:0.1.0
+description: Run statisical analysis for the RT-CETSA pipeline.
+entrypoint: python3 -m polus.tabular.regression.rt_cetsa_analysis
+inputs:
+- description: Input data collection to be processed by this tool
+  format:
+  - inpDir
+  name: inpDir
+  required: true
+  type: path
+- description: Filepattern to parse input files
+  format:
+  - pattern
+  name: pattern
+  required: false
+  type: string
+- description: Generate an output preview.
+  format:
+  - preview
+  name: preview
+  required: false
+  type: boolean
+name: polusai/RTCETSAAnalysis
+outputs:
+- description: Output collection
+  format:
+  - outDir
+  name: outDir
+  required: true
+  type: path
+repository: https://github.com/PolusAI/tabular-tools
+specVersion: 1.0.0
+title: RT-CETSA Analysis
+ui:
+- description: Input data collection
+  key: inputs.inpDir
+  title: Input data collection
+  type: path
+- description: Filepattern to parse input files
+  key: inputs.pattern
+  title: pattern
+  type: text
+- description: Generate an output preview.
+  key: inputs.preview
+  title: Preview example output of this plugin
+  type: checkbox
+version: 0.1.0
diff --git a/regression/rt-cetsa-analysis-tool/plugin.json b/regression/rt-cetsa-analysis-tool/plugin.json
new file mode 100644
index 0000000..a0e674e
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/plugin.json
@@ -0,0 +1,52 @@
+{
+  "name": "RT-CETSA Analysis",
+  "version": "0.1.0",
+  "title": "RT-CETSA Analysis",
+  "description": "Run statistical analysis for the RT-CETSA pipeline.",
+  "author": "Nicholas Schaub (nick.schaub@nih.gov), Antoine Gerardin (antoine.gerardin@nih.gov), Najib Ishaq (najib.ishaq@nih.gov)",
+  "institution": "National Center for Advancing Translational Sciences, National Institutes of Health",
+  "repository": "https://github.com/PolusAI/tabular-tools",
+  "website": "https://ncats.nih.gov/preclinical/core/informatics",
+  "citation": "",
+  "containerId": "polusai/rt-cetsa-analysis-tool:0.1.0",
+  "baseCommand": [
+    "python3",
+    "-m",
+    "polus.tabular.regression.rt_cetsa_analysis"
+  ],
+  "inputs": [
+    {
+      "name": "inpDir",
+      "type": "genericData",
+      "description": "Input data collection to be processed by this tool",
+      "required": true
+    },
+    {
+      "name": "filePattern",
+      "type": "string",
+      "description": "File Pattern to parse input files",
+      "default": ".+",
+      "required": false
+    }
+  ],
+  "outputs": [
+    {
+      "name": "outDir",
+      "type": "genericData",
+      "description": "Output data collection"
+    }
+  ],
+  "ui": [
+    {
+      "key": "inputs.inpDir",
+      "title": "Input collection",
+      "description": "Input data collection to be processed by this plugin"
+    },
+    {
+      "key": "inputs.filePattern",
+      "title": "File Pattern",
+      "description": "File Pattern to parse input files",
+      "default": ".+"
+    }
+  ]
+}
diff --git a/regression/rt-cetsa-analysis-tool/pyproject.toml b/regression/rt-cetsa-analysis-tool/pyproject.toml
new file mode 100644
index 0000000..2a7eda2
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/pyproject.toml
@@ -0,0 +1,77 @@
+[tool.poetry]
+name = "polus_tabular_regression_rt_cetsa_analysis"
+version = "0.1.0"
+description = "Run statiscal analysis for the RT-CETSA pipeline."
+authors = [
+    "Nick Schaub <nick.schaub@nih.gov>",
+    "Antoine Gerardin <antoine.gerardin@nih.gov>",
+    "Najib Ishaq <najib.ishaq@nih.gov>",
+]
+readme = "README.md"
+packages = [{include = "polus", from = "src"}]
+
+[tool.poetry.dependencies]
+python = ">=3.9,<3.12"
+typer = "^0.7.0"
+filepattern = "^2.0.5"
+pandas = "^2.2.2"
+matplotlib = "^3.8.4"
+scipy = "^1.13.0"
+
+[tool.poetry.group.dev.dependencies]
+bump2version = "^1.0.1"
+pre-commit = "^3.1.0"
+pytest = "^7.2.1"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.ruff]
+extend = "../../ruff.toml"
+extend-ignore = [
+    "RET505",  # Unnecessary `else` after `return` statement
+    "E501",  # Line too long
+    "ANN001",  # Missing type annotation
+    "D102",  # Missing docstring in public method
+    "ANN201",  # Missing return type annotation
+    "N806",  # Variable in function should be lowercase
+    "D205",  # 1 blank line required between summary line and description
+    "N803",  # Argument name should be lowercase
+    "PLR0913",  # Too many arguments
+    "D415",  # First line should end with a period, question mark, or exclamation point
+    "PLR2004",  # Magic value used in comparison
+    "B006",  # Do not use mutable default arguments
+    "D107",  # Missing docstring
+    "D101",  # Missing docstring
+    "E731",  # Do not assign a lambda expression, use a def
+    "E402",  # Module level import not at top of file
+    "PTH123",  # `open()` should be replaced with `Path.open()`
+    "PTH118",  # `os.path.join()` should be replaced with `/` operator
+    "PTH100",  # `os.path.abspath()` should be replaced with `Path.resolve()`
+    "PLR0915",  # Too many statements
+    "PLR0912",  # Too many branches
+    "C901",  # Function is too complex
+    "T201",  # `print` used
+    "E722",  # Do not use bare 'except'
+    "B904",  # Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
+    "ANN202",  # Missing return type annotation for private function
+    "ARG002",  # Unused method argument
+    "N802",  # Function name should be lowercase
+    "PTH103",  # `os.makedirs()` should be replaced with `Path.mkdir(parents=True)`
+    "ANN003",  # Missing type annotation for `**kwargs`
+    "B007",  # Loop control variable not used within the loop body
+    "ANN204",  # Missing return type annotation for magic method
+    "D417",  # Missing argument descriptions in the docstring
+    "ANN205",  # Missing return type annotation for static method
+    "PLR5501",  # Use `elif` instead of `else` following `if` condition to avoid unnecessary indentation
+    "EM102",  # Exception must not use an f-string literal
+    "D414",  # Section has no content
+    "RUF012",  # Mutable class attributes should be annotated with `typing.ClassVar`
+    "A001",  # Variable `input` is shadowing a Python builtin
+    "A002",  # Argument `input` is shadowing a Python builtin
+    "E741",  # Ambiguous variable name: `l`
+    "PTH120",  # `os.path.dirname()` should be replaced by `Path.parent`
+    "N816",  # Variable `cfFilename` in global scope should not be mixedCase
+    "PTH109",  # `os.getcwd()` should be replaced by `Path.cwd()`
+]
diff --git a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
new file mode 100644
index 0000000..9bb5d18
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
@@ -0,0 +1,32 @@
+class: CommandLineTool
+cwlVersion: v1.2
+inputs:
+  inpDir:
+    inputBinding:
+      prefix: --inpDir
+    type: Directory
+  filePattern:
+    inputBinding:
+      prefix: --filePattern
+    type: string?
+  preview:
+    inputBinding:
+      prefix: --preview
+    type: boolean?
+  outDir:
+    inputBinding:
+      prefix: --outDir
+    type: Directory
+outputs:
+  outDir:
+    outputBinding:
+      glob: $(inputs.outDir.basename)
+    type: Directory
+requirements:
+  DockerRequirement:
+    dockerPull: polusai/rt-cetsa-analysis-tool:0.1.0
+  InitialWorkDirRequirement:
+    listing:
+    - entry: $(inputs.outDir)
+      writable: true
+  InlineJavascriptRequirement: {}
diff --git a/regression/rt-cetsa-analysis-tool/run-plugin.sh b/regression/rt-cetsa-analysis-tool/run-plugin.sh
new file mode 100755
index 0000000..c0aa711
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/run-plugin.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+version=$(<VERSION)
+datapath=$(readlink -f data)
+
+# Inputs
+inpDir=/data/input
+pattern=".*"
+
+# Output paths
+outDir=/data/output
+
+# Log level, must be one of ERROR, CRITICAL, WARNING, INFO, DEBUG
+LOGLEVEL=INFO
+
+docker run --mount type=bind,source=${datapath},target=/data/  \
+            --env POLUS_LOG=${LOGLEVEL} \
+            polusai/rt-cetsa-analysis-tool:${version} \
+            --inpDir ${inpDir} \
+            --filePattern ${pattern} \
+            --outDir ${outDir}

From 6233b6ed418097d41664ddf0eef50d6e556acc2f Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 14 May 2024 09:20:01 -0400
Subject: [PATCH 06/18] feat: rt_Cetsa analysis tool, containerized.

---
 regression/rt-cetsa-analysis-tool/Dockerfile  |   64 +-
 .../rt-cetsa-analysis-tool/build-docker.sh    |    2 +-
 regression/rt-cetsa-analysis-tool/ict.yml     |   42 +-
 regression/rt-cetsa-analysis-tool/plugin.json |   21 +-
 .../rt-cetsa-analysis-tool/pyproject.toml     |    7 +-
 .../requirements/Requirements_1.R             |    3 +
 .../requirements/Requirements_2.R             |    1 +
 .../requirements/Requirements_3.R             |    6 +
 .../requirements/Requirements_4.R             |    1 +
 .../requirements/Requirements_5.R             |    1 +
 .../requirements/Requirements_6.R             |    1 +
 .../rt_cetsa_analysis.cwl                     |   16 +-
 .../rt-cetsa-analysis-tool/run-plugin.sh      |    2 +-
 .../regression/rt_cetsa_analysis/__init__.py  |    1 +
 .../regression/rt_cetsa_analysis/__main__.py  |   96 +
 .../rt_cetsa_analysis/__main_filepattern__.py |   52 +
 .../regression/rt_cetsa_analysis/functions.R  | 1559 +++++++++++++++++
 .../regression/rt_cetsa_analysis/main.R       |   59 +
 .../rt_cetsa_analysis/main_analysis.R         |   92 +
 .../rt_cetsa_analysis/prepare_data.R          |  113 ++
 .../prepare_moltenprot_outputs.py             |    1 +
 .../rt_cetsa_analysis/prepare_params.R        |   78 +
 .../rt_cetsa_analysis/prepare_values.R        |   64 +
 .../rt_cetsa_analysis/run_rscript.py          |   42 +
 24 files changed, 2277 insertions(+), 47 deletions(-)
 mode change 100755 => 100644 regression/rt-cetsa-analysis-tool/Dockerfile
 create mode 100644 regression/rt-cetsa-analysis-tool/requirements/Requirements_1.R
 create mode 100644 regression/rt-cetsa-analysis-tool/requirements/Requirements_2.R
 create mode 100644 regression/rt-cetsa-analysis-tool/requirements/Requirements_3.R
 create mode 100644 regression/rt-cetsa-analysis-tool/requirements/Requirements_4.R
 create mode 100644 regression/rt-cetsa-analysis-tool/requirements/Requirements_5.R
 create mode 100644 regression/rt-cetsa-analysis-tool/requirements/Requirements_6.R
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main_filepattern__.py
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/functions.R
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main.R
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main_analysis.R
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_data.R
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_params.R
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_values.R
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py

diff --git a/regression/rt-cetsa-analysis-tool/Dockerfile b/regression/rt-cetsa-analysis-tool/Dockerfile
old mode 100755
new mode 100644
index 0e29286..b41b225
--- a/regression/rt-cetsa-analysis-tool/Dockerfile
+++ b/regression/rt-cetsa-analysis-tool/Dockerfile
@@ -1,20 +1,60 @@
-FROM polusai/bfio:2.1.9
+FROM r-base:4.4.0
 
-# environment variables defined in polusai/bfio
-ENV EXEC_DIR="/opt/executables"
-ENV POLUS_IMG_EXT=".ome.tif"
-ENV POLUS_TAB_EXT=".csv"
-ENV POLUS_LOG="INFO"
 
-# Work directory defined in the base container
-WORKDIR ${EXEC_DIR}
+ARG EXEC_DIR="/opt/executables"
+
+RUN apt-get update && apt-get install -y python3
+
+#Create folders
+RUN mkdir -p ${EXEC_DIR}
+
+# Install R packages external dependencies
+RUN apt-get install -y libssl-dev \
+    && apt-get install -y libcurl4-openssl-dev libfontconfig1-dev \
+    && apt-get install -y libharfbuzz-dev libfribidi-dev \
+    && apt-get install -y libxml2-dev \
+    && apt-get install -y libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev \
+    && apt-get install -y cmake
+
+# Install required R packages
+ADD requirements/Requirements_1.R ${EXEC_DIR}/Requirements_1.R
+RUN Rscript ${EXEC_DIR}/Requirements_1.R
+
+ADD requirements/Requirements_2.R ${EXEC_DIR}/Requirements_2.R
+RUN Rscript ${EXEC_DIR}/Requirements_2.R
+
+ADD requirements/Requirements_3.R ${EXEC_DIR}/Requirements_3.R
+RUN Rscript ${EXEC_DIR}/Requirements_3.R
+
+RUN apt-get install -y --allow-downgrades libxcb-render0=1.15-1 libxcb-shm0=1.15-1 libxcb1=1.15-1
+RUN apt-get install -y  libcairo2-dev
+
+ADD requirements/Requirements_4.R ${EXEC_DIR}/Requirements_4.R
+RUN Rscript ${EXEC_DIR}/Requirements_4.R
+
+RUN apt-get install -y libgit2-dev
+
+ADD requirements/Requirements_6.R ${EXEC_DIR}/Requirements_6.R
+RUN Rscript ${EXEC_DIR}/Requirements_6.R
+
+ADD requirements/Requirements_5.R ${EXEC_DIR}/Requirements_5.R
+RUN Rscript ${EXEC_DIR}/Requirements_5.R
 
 COPY pyproject.toml ${EXEC_DIR}
-COPY VERSION ${EXEC_DIR}
 COPY README.md ${EXEC_DIR}
-COPY src ${EXEC_DIR}/src
+RUN apt-get update && apt-get install -y python3
+RUN apt-get -y install python3-pip python3-venv
 
-RUN pip3 install ${EXEC_DIR} --no-cache-dir
+COPY src/ ${EXEC_DIR}/src
+
+RUN python3 -m venv ${EXEC_DIR}/.venv
+# RUN chmod u+x ${EXEC_DIR}/.venv/bin/activate
+# RUN ${EXEC_DIR}/.venv/bin/activate
+RUN ${EXEC_DIR}/.venv/bin/pip3 install ${EXEC_DIR} --no-cache-dir
+
+
+WORKDIR ${EXEC_DIR}
 
-ENTRYPOINT ["python3", "-m", "polus.tabular.regression.rt_cetsa_analysis"]
+# CMD ["Rscript", "main.R"]
+ENTRYPOINT ["/opt/executables/.venv/bin/python3", "-m", "polus.tabular.regression.rt_cetsa_analysis"]
 CMD ["--help"]
diff --git a/regression/rt-cetsa-analysis-tool/build-docker.sh b/regression/rt-cetsa-analysis-tool/build-docker.sh
index 93d9b45..aaecd7a 100755
--- a/regression/rt-cetsa-analysis-tool/build-docker.sh
+++ b/regression/rt-cetsa-analysis-tool/build-docker.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 version=$(<VERSION)
-docker build . -t polusai/rt-cetsa-analysis-tool:"${version}"
+docker build . -t polusai/rt-cetsa-analysis-simple-tool:"${version}"
diff --git a/regression/rt-cetsa-analysis-tool/ict.yml b/regression/rt-cetsa-analysis-tool/ict.yml
index 50b7487..b001e9f 100644
--- a/regression/rt-cetsa-analysis-tool/ict.yml
+++ b/regression/rt-cetsa-analysis-tool/ict.yml
@@ -3,22 +3,28 @@ author:
 - Antoine Gerardin
 - Najib Ishaq
 contact: nick.schaub@nih.gov
-container: polusai/rt-cetsa-analysis-tool:0.1.0
+container: polusai/rt-cetsa-analysis-simple-tool:0.1.0
 description: Run statisical analysis for the RT-CETSA pipeline.
 entrypoint: python3 -m polus.tabular.regression.rt_cetsa_analysis
 inputs:
-- description: Input data collection to be processed by this tool
+- description: Fit params csv file
   format:
-  - inpDir
-  name: inpDir
+  - params
+  name: params
   required: true
   type: path
-- description: Filepattern to parse input files
+- description: Baseline corrected csv files
   format:
-  - pattern
-  name: pattern
-  required: false
-  type: string
+  - values
+  name: values
+  required: true
+  type: path
+- description: platemap excel file
+  format:
+  - platemap
+  name: platemap
+  required: true
+  type: path
 - description: Generate an output preview.
   format:
   - preview
@@ -37,14 +43,18 @@ repository: https://github.com/PolusAI/tabular-tools
 specVersion: 1.0.0
 title: RT-CETSA Analysis
 ui:
-- description: Input data collection
-  key: inputs.inpDir
-  title: Input data collection
+- description: Fit params csv file
+  key: inputs.params
+  title: Fit params csv file
+  type: path
+- description: baseline corrected csv file
+  key: inputs.values
+  title: baseline corrected csv file
+  type: path
+- description: platemap excel file
+  key: inputs.platemap
+  title: platemap excel file
   type: path
-- description: Filepattern to parse input files
-  key: inputs.pattern
-  title: pattern
-  type: text
 - description: Generate an output preview.
   key: inputs.preview
   title: Preview example output of this plugin
diff --git a/regression/rt-cetsa-analysis-tool/plugin.json b/regression/rt-cetsa-analysis-tool/plugin.json
index a0e674e..d59519a 100644
--- a/regression/rt-cetsa-analysis-tool/plugin.json
+++ b/regression/rt-cetsa-analysis-tool/plugin.json
@@ -8,7 +8,7 @@
   "repository": "https://github.com/PolusAI/tabular-tools",
   "website": "https://ncats.nih.gov/preclinical/core/informatics",
   "citation": "",
-  "containerId": "polusai/rt-cetsa-analysis-tool:0.1.0",
+  "containerId": "polusai/rt-cetsa-analysis-simple-tool:0.1.0",
   "baseCommand": [
     "python3",
     "-m",
@@ -16,17 +16,22 @@
   ],
   "inputs": [
     {
-      "name": "inpDir",
+      "name": "params",
       "type": "genericData",
-      "description": "Input data collection to be processed by this tool",
+      "description": "Fit params csv file",
       "required": true
     },
     {
-      "name": "filePattern",
-      "type": "string",
-      "description": "File Pattern to parse input files",
-      "default": ".+",
-      "required": false
+      "name": "values",
+      "type": "genericData",
+      "description": "Baseline corrected csv files",
+      "required": true
+    },
+    {
+      "name": "platemap",
+      "type": "genericData",
+      "description": "platemap excel file",
+      "required": true
     }
   ],
   "outputs": [
diff --git a/regression/rt-cetsa-analysis-tool/pyproject.toml b/regression/rt-cetsa-analysis-tool/pyproject.toml
index 2a7eda2..92595cf 100644
--- a/regression/rt-cetsa-analysis-tool/pyproject.toml
+++ b/regression/rt-cetsa-analysis-tool/pyproject.toml
@@ -12,11 +12,8 @@ packages = [{include = "polus", from = "src"}]
 
 [tool.poetry.dependencies]
 python = ">=3.9,<3.12"
-typer = "^0.7.0"
-filepattern = "^2.0.5"
-pandas = "^2.2.2"
-matplotlib = "^3.8.4"
-scipy = "^1.13.0"
+# filepattern = "2.0.4"
+typer = "^0.12.3"
 
 [tool.poetry.group.dev.dependencies]
 bump2version = "^1.0.1"
diff --git a/regression/rt-cetsa-analysis-tool/requirements/Requirements_1.R b/regression/rt-cetsa-analysis-tool/requirements/Requirements_1.R
new file mode 100644
index 0000000..cfd173c
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/requirements/Requirements_1.R
@@ -0,0 +1,3 @@
+install.packages('argparse')
+install.packages('logging')
+install.packages('tidyverse')
diff --git a/regression/rt-cetsa-analysis-tool/requirements/Requirements_2.R b/regression/rt-cetsa-analysis-tool/requirements/Requirements_2.R
new file mode 100644
index 0000000..60ee622
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/requirements/Requirements_2.R
@@ -0,0 +1 @@
+install.packages('drc')
diff --git a/regression/rt-cetsa-analysis-tool/requirements/Requirements_3.R b/regression/rt-cetsa-analysis-tool/requirements/Requirements_3.R
new file mode 100644
index 0000000..cba1975
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/requirements/Requirements_3.R
@@ -0,0 +1,6 @@
+install.packages('readxl')
+install.packages('stringr')
+install.packages('ggthemes')
+install.packages('cowplot')
+install.packages('ggpubr')
+install.packages('MESS')
diff --git a/regression/rt-cetsa-analysis-tool/requirements/Requirements_4.R b/regression/rt-cetsa-analysis-tool/requirements/Requirements_4.R
new file mode 100644
index 0000000..a779a3a
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/requirements/Requirements_4.R
@@ -0,0 +1 @@
+install.packages('hrbrthemes')
diff --git a/regression/rt-cetsa-analysis-tool/requirements/Requirements_5.R b/regression/rt-cetsa-analysis-tool/requirements/Requirements_5.R
new file mode 100644
index 0000000..f55228f
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/requirements/Requirements_5.R
@@ -0,0 +1 @@
+install.packages('devtools')
diff --git a/regression/rt-cetsa-analysis-tool/requirements/Requirements_6.R b/regression/rt-cetsa-analysis-tool/requirements/Requirements_6.R
new file mode 100644
index 0000000..caae61f
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/requirements/Requirements_6.R
@@ -0,0 +1 @@
+install.packages('gert')
diff --git a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
index 9bb5d18..e49f32b 100644
--- a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
+++ b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
@@ -5,10 +5,18 @@ inputs:
     inputBinding:
       prefix: --inpDir
     type: Directory
-  filePattern:
+  params:
     inputBinding:
-      prefix: --filePattern
-    type: string?
+      prefix: --params
+    type: string
+  values:
+    inputBinding:
+      prefix: --values
+    type: string
+  platemap:
+    inputBinding:
+      prefix: --platemap
+    type: File
   preview:
     inputBinding:
       prefix: --preview
@@ -24,7 +32,7 @@ outputs:
     type: Directory
 requirements:
   DockerRequirement:
-    dockerPull: polusai/rt-cetsa-analysis-tool:0.1.0
+    dockerPull: polusai/rt-cetsa-analysis-simple-tool:0.1.0
   InitialWorkDirRequirement:
     listing:
     - entry: $(inputs.outDir)
diff --git a/regression/rt-cetsa-analysis-tool/run-plugin.sh b/regression/rt-cetsa-analysis-tool/run-plugin.sh
index c0aa711..64468bc 100755
--- a/regression/rt-cetsa-analysis-tool/run-plugin.sh
+++ b/regression/rt-cetsa-analysis-tool/run-plugin.sh
@@ -14,7 +14,7 @@ LOGLEVEL=INFO
 
 docker run --mount type=bind,source=${datapath},target=/data/  \
             --env POLUS_LOG=${LOGLEVEL} \
-            polusai/rt-cetsa-analysis-tool:${version} \
+            polusai/rt-cetsa-analysis-simple-tool:${version} \
             --inpDir ${inpDir} \
             --filePattern ${pattern} \
             --outDir ${outDir}
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py
new file mode 100644
index 0000000..98fb3e2
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py
@@ -0,0 +1 @@
+"""RT CETSA Analysis."""
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
new file mode 100644
index 0000000..30cd581
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
@@ -0,0 +1,96 @@
+"""CLI for rt-cetsa-moltprot-tool."""
+
+import logging
+import os
+from pathlib import Path
+
+import typer
+from polus.tabular.regression.rt_cetsa_analysis.run_rscript import run_rscript
+
+# get env
+POLUS_LOG = os.environ.get("POLUS_LOG", logging.INFO)
+POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", ".csv")
+
+# Initialize the logger
+logging.basicConfig(
+    format="%(asctime)s - %(name)-8s - %(levelname)-8s - %(message)s",
+    datefmt="%d-%b-%y %H:%M:%S",
+)
+logger = logging.getLogger("rt_cetsa_analysis")
+logger.setLevel(POLUS_LOG)
+
+app = typer.Typer()
+
+
+@app.command()
+def main(
+    inp_dir: Path = typer.Option(
+        ...,
+        "--inpDir",
+        help="Input directory containing the all data files.",
+        exists=True,
+        dir_okay=True,
+        readable=True,
+        resolve_path=True,
+    ),
+    params_pattern: str = typer.Option(
+        ...,
+        "--params",
+        help="name of the molten fit params csv file in the input directory.",
+    ),
+    values_pattern: str = typer.Option(
+        ...,
+        "--values",
+        help="name of the baseline corrected values csv file in the input directory.",
+    ),
+    platemap: Path = typer.Option(
+        ...,
+        "--platemap",
+        help="Path to the platemap file.",
+        exists=True,
+        readable=True,
+        resolve_path=True,
+    ),
+    preview: bool = typer.Option(
+        False,
+        "--preview",
+        help="Preview the files that will be processed.",
+    ),
+    out_dir: Path = typer.Option(
+        ...,
+        "--outDir",
+        help="Output directory to save the results.",
+        exists=True,
+        dir_okay=True,
+        writable=True,
+        resolve_path=True,
+    ),
+) -> None:
+    """CLI for rt-cetsa-moltprot-tool."""
+    # TODO: Add to docs that input csv file should be sorted by `Temperature` column.
+    logger.info("Starting the CLI for rt-cetsa-moltprot-tool.")
+
+    logger.info(f"Input directory: {inp_dir}")
+    logger.info(f"params_pattern: {params_pattern}")
+    logger.info(f"values_pattern: {values_pattern}")
+    logger.info(f"platemap path: {platemap}")
+    logger.info(f"Output directory: {out_dir}")
+
+    params = inp_dir / params_pattern
+    values = inp_dir / values_pattern
+
+    if preview:
+        NotImplemented  # noqa:  B018
+
+    if not params.exists():
+        raise FileNotFoundError(f"params file not found : {params}")
+    if not values.exists():
+        raise FileNotFoundError(f"values file not found : {values}")
+    if not platemap.exists():
+        raise FileNotFoundError(f"platemap file not found : {platemap}")
+
+    run_rscript(params, values, platemap, out_dir)
+
+
+if __name__ == "__main__":
+    app()
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main_filepattern__.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main_filepattern__.py
new file mode 100644
index 0000000..d150462
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main_filepattern__.py
@@ -0,0 +1,52 @@
+"""Main with filepattern support."""
+# """CLI for rt-cetsa-moltprot-tool."""
+
+
+# # get env
+
+# # Initialize the logger
+# logging.basicConfig(
+
+
+# @app.command()
+# def main(
+#     inp_dir: pathlib.Path = typer.Option(
+#         ...,
+#         "--inpDir",
+#     ),
+#     params_pattern: str = typer.Option(
+#         ".+",
+#         "--params",
+#     ),
+#     values_pattern: str = typer.Option(
+#         ".+",
+#         "--values",
+#     ),
+#     platemap_pattern: str = typer.Option(
+#         ".+",
+#         "--platemap",
+#     ),
+#     preview: bool = typer.Option(
+#         False,
+#         "--preview",
+#     ),
+#     out_dir: pathlib.Path = typer.Option(
+#         ...,
+#         "--outDir",
+#     ),
+# ) -> None:
+#     """CLI for rt-cetsa-moltprot-tool."""
+#     # TODO: Add to docs that input csv file should be sorted by `Temperature` column.
+
+
+#     if preview:
+#         with (out_dir / "preview.json").open("w") as f:
+
+#     for params, values, platemap in zip(params_files, values_files, platemap_files):
+#         # TODO replace with exceptions
+#         if len(params[1]) != 1 or len(values[1]) != 1 or len(platemap[1]) != 1:
+#             raise Exception(
+#                 msg,
+
+
+# if __name__ == "__main__":
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/functions.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/functions.R
new file mode 100644
index 0000000..cb2be4d
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/functions.R
@@ -0,0 +1,1559 @@
+# #  @@@@@@@   @@@@@@@              @@@@@@@  @@@@@@@@  @@@@@@@   @@@@@@    @@@@@@
+# #  @@@@@@@@  @@@@@@@             @@@@@@@@  @@@@@@@@  @@@@@@@  @@@@@@@   @@@@@@@@
+# #  @@!  @@@    @@!               !@@       @@!         @@!    !@@       @@!  @@@
+# #  !@!  @!@    !@!               !@!       !@!         !@!    !@!       !@!  @!@
+# #  @!@!!@!     @!!    @!@!@!@!@  !@!       @!!!:!      @!!    !!@@!!    @!@!@!@!
+# #  !!@!@!      !!!    !!!@!@!!!  !!!       !!!!!:      !!!     !!@!!!   !!!@!!!!
+# #  !!: :!!     !!:               :!!       !!:         !!:         !:!  !!:  !!!
+# #  :!:  !:!    :!:               :!:       :!:         :!:        !:!   :!:  !:!
+# #  ::   :::     ::                ::: :::   :: ::::     ::    :::: ::   ::   :::
+# # Isothermal Analysis of CETSA/RT-CETSA Experimental Sets
+# #
+# # Plate assignment, data cleanup, and functions
+# # Patents: PCT/US21/45184, HHS E-022-2022-0-US-01
+
+print("####### loading all functions used in the analysis...")
+
+library(tidyverse)
+library(readxl)
+library(stringr)
+library(drc)
+library(ggthemes)
+library(cowplot)
+library(hrbrthemes)
+library(ggpubr)
+library(MESS)
+library(devtools)
+# load_all(".");
+#' construct_grid
+#' Construct a grid with compatable headers for MoltenProt file prep
+#'
+#' @param row_num Number of rows in microplate
+#' @param col_num Number of columns in microplate
+#' @param pad_num Add padding 0 to well address?
+#'
+#' @return df containing the grid
+#' @export
+#'
+#' @examples
+#' construct_grid(16,24)
+#' construct_grid(32,48,TRUE)
+construct_grid <-
+  function(row_num = 16,
+           col_num = 24,
+           pad_num = FALSE) {
+    if (pad_num == FALSE) {
+      grid <-
+        expand.grid(row = LETTERS[1:(row_num)], col = c(1:(col_num))) %>%
+        arrange(row) %>%
+        mutate(address = paste(row, col, sep = '')) %>%
+        dplyr::select(-c('row', 'col'))
+    } else {
+      letter <- LETTERS[1:(row_num)]
+      number <- c(1:(col_num))
+      number <- str_pad(number, 2, pad = '0')
+      col_by_row <-
+        expand.grid(row = sprintf('%.2d', 1:16),
+                    col = sprintf('%.2d', 1:24)) %>%
+        arrange(., row)
+    }
+    return(grid)
+  }
+
+#' prepMatLabforMolt
+#'
+#' Need to provide location of file, usually in ./data/, and sheet location
+#'
+#' @param file_loc Location of raw Matlab data file
+#' @param sheet What sheet in the .xlsx is the data located
+#' @param col_names Are there column names in the data sheet
+#' @param start_temp Start temp for the experiment
+#' @param end_temp End temp for the experiment
+#'
+#' @return df containing the raw values for the RT-CETSA experiment
+#' @export
+#'
+#' @examples
+#' prepMatLabforMolt(file_loc = './data/210318_plate3.xlsx',
+#' start_temp = startTemp,
+#' end_temp = endTemp)
+prepMatLabforMolt <- function(file_loc = './data/rtcetsa_raw.xlsx',
+                              sheet = 'Sheet1',
+                              col_names = FALSE,
+                              start_temp = 37,
+                              end_temp = 90) {
+  if (file.exists(file_loc) == FALSE) {
+    stop('File does not exist at path supplied.')
+  }
+  df <-
+    read_excel(
+      path = file_loc,
+      sheet = sheet,
+      col_names = col_names,
+      .name_repair = 'unique'
+    )
+  if (nrow(df) == 0 || ncol(df) == 0) {
+    stop('Imported file is empty. Please navigate to correct RT-CETSA file')
+  }
+  df <- df %>%
+    dplyr::select(-c('...1', '...2')) %>%
+    rownames_to_column() %>%
+    rename('well' = 'rowname')
+
+  # Construct temperature index (t_n) and pivot around the data to tidy
+  tracker <- 1
+  for (val in 2:ncol(df) - 1) {
+    names(df)[val + 1] <- paste('t_', val, sep = '')
+    tracker <- tracker + 1
+  }
+  df <- df %>%
+    pivot_longer(., cols = 2:ncol(df)) %>%
+    pivot_wider(names_from = well) %>%
+    rename(., 'Temperature' = 'name') %>%
+    mutate(., Temperature = as.integer(gsub("[^0-9.]", "", Temperature)))
+
+  #Create temperature index in line with experimental parameters supplied in main script
+  temperature_df <-
+    seq(start_temp, end_temp, by = ((end_temp - start_temp) / (nrow(df) -
+                                                                 1))) %>%
+    round(., digits = 1)
+  for (i in 1:length(temperature_df))
+    df$Temperature[i] <- temperature_df[i]
+
+  # Assemble data for moltenprot analysis by splitting 384-well plate to 96-well plate with appropriate index
+  grid_96w <- construct_grid(row_num = 8, col_num = 12)
+  q1 <- df %>%
+    dplyr::select(., 1, 2:97)
+  tracker <- 1
+  for (val in 1:nrow(grid_96w)) {
+    colnames(q1)[val + 1] <- grid_96w$address[val]
+    tracker <- tracker + 1
+  }
+  q2 <- df %>%
+    dplyr::select(., 1, 98:193)
+  tracker <- 1
+  for (val in 1:nrow(grid_96w)) {
+    colnames(q2)[val + 1] <- grid_96w$address[val]
+    tracker <- tracker + 1
+  }
+  q3 <- df %>%
+    dplyr::select(., 1, 194:289)
+  tracker <- 1
+  for (val in 1:nrow(grid_96w)) {
+    colnames(q3)[val + 1] <- grid_96w$address[val]
+    tracker <- tracker + 1
+  }
+  q4 <- df %>%
+    dplyr::select(., 1, 290:385)
+  tracker <- 1
+  for (val in 1:nrow(grid_96w)) {
+    colnames(q4)[val + 1] <- grid_96w$address[val]
+    tracker <- tracker + 1
+  }
+  write.csv(q1, './data/cleaned_expt1.csv', row.names = FALSE)
+  write.csv(q2, './data/cleaned_expt2.csv', row.names = FALSE)
+  write.csv(q3, './data/cleaned_expt3.csv', row.names = FALSE)
+  write.csv(q4, './data/cleaned_expt4.csv', row.names = FALSE)
+
+  return(df)
+}
+
+# Read in MoltenProt readout, with different column identities for different models
+retrieveMoltenData <-
+  function(model = 'standard',
+           plate_format = 384) {
+    # Retrieve experimental data from processed file folders
+    col_by_row <-
+      expand.grid(row = sprintf('%.2d', 1:16), col = sprintf('%.2d', 1:24)) %>%
+      arrange(., row)
+    if (model == 'standard') {
+      exp1_param <-
+        read_excel('./data/cleaned_expt1/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit parameters') %>%
+        dplyr::select(-c('Condition'))
+      exp2_param <-
+        read_excel('./data/cleaned_expt2/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit parameters') %>%
+        dplyr::select(-c('Condition'))
+      exp3_param <-
+        read_excel('./data/cleaned_expt3/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit parameters') %>%
+        dplyr::select(-c('Condition'))
+      exp4_param <-
+        read_excel('./data/cleaned_expt4/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit parameters') %>%
+        dplyr::select(-c('Condition'))
+      # Reformat ID column in each exp from MoltenProt format (A1, not A01) to arrange
+      exp1_param$ID <-
+        gsub('([A-Z])(\\d)(?!\\d)', '\\10\\2\\3', exp1_param$ID, perl = TRUE)
+      exp1_param <- exp1_param %>% arrange(ID)
+      exp2_param$ID <-
+        gsub('([A-Z])(\\d)(?!\\d)', '\\10\\2\\3', exp2_param$ID, perl = TRUE)
+      exp2_param <- exp2_param %>% arrange(ID)
+      exp3_param$ID <-
+        gsub('([A-Z])(\\d)(?!\\d)', '\\10\\2\\3', exp3_param$ID, perl = TRUE)
+      exp3_param <- exp3_param %>% arrange(ID)
+      exp4_param$ID <-
+        gsub('([A-Z])(\\d)(?!\\d)', '\\10\\2\\3', exp4_param$ID, perl = TRUE)
+      exp4_param <- exp4_param %>% arrange(ID)
+      # Combine all experiments and add identifiers
+      exp_param_full <-
+        exp1_param %>% rbind(., exp2_param, exp3_param, exp4_param) %>%
+        rownames_to_column() %>% rename('well' = 'rowname') %>%
+        dplyr::select(
+          -c(
+            'ID',
+            'kN_init',
+            'bN_init',
+            'kU_init',
+            'bU_init',
+            'dHm_init',
+            'Tm_init',
+            'kN_fit',
+            'bN_fit',
+            'kU_fit',
+            'bU_fit',
+            'S',
+            'dCp_component'
+          )
+        ) %>%
+        bind_cols(col_by_row) %>%
+        relocate(c('row', 'col'), .after = well) %>%
+        dplyr::select(-'well')
+      exp_param_full <- well_assignment(exp_param_full, 384)
+      return(exp_param_full)
+    }
+    if (model == 'irrev') {
+      exp1_param <-
+        read_excel('./data/cleaned_expt1/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit parameters') %>%
+        dplyr::select(-c('Condition'))
+      exp2_param <-
+        read_excel('./data/cleaned_expt2/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit parameters') %>%
+        dplyr::select(-c('Condition'))
+      exp3_param <-
+        read_excel('./data/cleaned_expt3/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit parameters') %>%
+        dplyr::select(-c('Condition'))
+      exp4_param <-
+        read_excel('./data/cleaned_expt4/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit parameters') %>%
+        dplyr::select(-c('Condition'))
+      # Reformat ID column in each exp from MoltenProt format (A1, not A01) to arrange
+      exp1_param$ID <-
+        gsub('([A-Z])(\\d)(?!\\d)', '\\10\\2\\3', exp1_param$ID, perl = TRUE)
+      exp1_param <- exp1_param %>% arrange(ID)
+      exp2_param$ID <-
+        gsub('([A-Z])(\\d)(?!\\d)', '\\10\\2\\3', exp2_param$ID, perl = TRUE)
+      exp2_param <- exp2_param %>% arrange(ID)
+      exp3_param$ID <-
+        gsub('([A-Z])(\\d)(?!\\d)', '\\10\\2\\3', exp3_param$ID, perl = TRUE)
+      exp3_param <- exp3_param %>% arrange(ID)
+      exp4_param$ID <-
+        gsub('([A-Z])(\\d)(?!\\d)', '\\10\\2\\3', exp4_param$ID, perl = TRUE)
+      exp4_param <- exp4_param %>% arrange(ID)
+      # Combine all experiments and add identifiers
+      exp_param_full <-
+        exp1_param %>% rbind(., exp2_param, exp3_param, exp4_param) %>%
+        rownames_to_column() %>% rename('well' = 'rowname') %>%
+        dplyr::select(c(
+          'well',
+          'Ea_fit',
+          'Tf_fit',
+          'kN_fit',
+          'bN_fit',
+          'kU_fit',
+          'bU_fit',
+          'S'
+        )) %>%
+        bind_cols(col_by_row) %>%
+        relocate(c('row', 'col'), .after = well) %>%
+        dplyr::select(-'well')
+      exp_param_full <- well_assignment(exp_param_full, 384)
+      return(exp_param_full)
+    }
+  }
+
+# Gather base-line corrected fit curves for the 384-well plate and pivot plate
+retrieve_FittedCurves <-
+  function(model = 'baseline-fit',
+           start_temp = 37,
+           end_temp = 90) {
+    col_by_row <-
+      expand.grid(row = sprintf('%.2d', 1:16), col = sprintf('%.2d', 1:24)) %>%
+      arrange(., row)
+    if (model == 'baseline-fit') {
+      exp1_curve <-
+        read_excel('./data/cleaned_expt1/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Baseline-corrected')
+      exp2_curve <-
+        read_excel('./data/cleaned_expt2/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Baseline-corrected') %>%
+        dplyr::select(-c('Temperature'))
+      exp3_curve <-
+        read_excel('./data/cleaned_expt3/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Baseline-corrected') %>%
+        dplyr::select(-c('Temperature'))
+      exp4_curve <-
+        read_excel('./data/cleaned_expt4/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Baseline-corrected') %>%
+        dplyr::select(-c('Temperature'))
+      exp_curve_all <-
+        cbind(
+          xp1 = exp1_curve,
+          xp2 = exp2_curve,
+          xp3 = exp3_curve,
+          xp4 = exp4_curve
+        ) %>%
+        rename(., Temperature = xp1.Temperature) %>%
+        mutate(., Temperature = paste('val_t_', Temperature, sep = ''))
+      exp_curve_all <- exp_curve_all %>%
+        pivot_longer(cols = 2:ncol(exp_curve_all)) %>%
+        pivot_wider(names_from = Temperature) %>%
+        rownames_to_column() %>% rename('well' = 'rowname') %>%
+        bind_cols(col_by_row) %>%
+        dplyr::select(-c('name', 'well', 'row', 'col')) %>%
+        add_tempheaders(., start_temp, end_temp)
+      message('Fit curves retrieved.')
+      return(exp_curve_all)
+    }
+    if (model == 'fit_curves') {
+      exp1_curve <-
+        read_excel('./data/cleaned_expt1/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit curves')
+      exp2_curve <-
+        read_excel('./data/cleaned_expt2/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit curves') %>%
+        dplyr::select(-c('Temperature'))
+      exp3_curve <-
+        read_excel('./data/cleaned_expt3/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit curves') %>%
+        dplyr::select(-c('Temperature'))
+      exp4_curve <-
+        read_excel('./data/cleaned_expt4/Signal_resources/Signal_results.xlsx',
+                   sheet = 'Fit curves') %>%
+        dplyr::select(-c('Temperature'))
+      exp_curve_all <-
+        cbind(
+          xp1 = exp1_curve,
+          xp2 = exp2_curve,
+          xp3 = exp3_curve,
+          xp4 = exp4_curve
+        ) %>%
+        rename(., Temperature = xp1.Temperature) %>%
+        mutate(., Temperature = paste('val_t_', Temperature, sep = ''))
+      exp_curve_all <- exp_curve_all %>%
+        pivot_longer(cols = 2:ncol(exp_curve_all)) %>%
+        pivot_wider(names_from = Temperature) %>%
+        rownames_to_column() %>% rename('well' = 'rowname') %>%
+        bind_cols(col_by_row) %>%
+        dplyr::select(-c('name', 'well', 'row', 'col')) %>%
+        add_tempheaders(., start_temp, end_temp)
+      message('Fit curves retrieved.')
+      return(exp_curve_all)
+    }
+  }
+
+# Construct full data frame with curve fit and parameters for analysis
+bind_fulldf <- function(param_df, curve_df) {
+  df <- cbind(param_df, curve_df)
+  return(df)
+}
+
+#Convert any columns containing Kelvin values from MoltenProt to Celsius
+kelToCel <- function(df) {
+  df <- df %>%
+    mutate(Tm_fit = Tm_fit - 273.15) %>%
+    mutate(T_onset = T_onset - 273.15)
+}
+# #
+# ISO-CETSA Functions (new)
+# #
+
+# Add temperature headers to df
+add_tempheaders <- function(df,
+                            start_temp = 37,
+                            end_temp = 90) {
+  temperature_df <-
+    seq(start_temp, end_temp, by = ((end_temp - start_temp) / (ncol(df) - 1))) %>%
+    round(., digits = 1)
+  for (i in 1:ncol(df)) {
+    colnames(df)[i] <- paste('t_', temperature_df[i], sep = '')
+  }
+  message('Temperature assignments changed for ',
+          ncol(df),
+          ' points.')
+  return(df)
+}
+
+# Add row and column to a tidy dataframe (columns are each temperatures, rows are wells/conditions)
+add_rowcol <- function(df, well_num) {
+  if (well_num == 96) {
+    col_by_row <-
+      expand.grid(row = sprintf('%.2d', 1:8), col = sprintf('%.2d', 1:12)) %>%
+      arrange(., row)
+  }
+  else if (well_num == 384) {
+    col_by_row <-
+      expand.grid(row = sprintf('%.2d', 1:16),
+                  col = sprintf('%.2d', 1:24)) %>%
+      arrange(., row)
+  }
+  message('Row + Column assignments created for ',
+          well_num,
+          '-well plate')
+  df <- cbind(col_by_row, df)
+  return(df)
+}
+
+# Add well assignmnets for each plate
+well_assignment <- function(df, well_num) {
+  if (well_num == 96) {
+    letter <- LETTERS[1:8]
+    number <- c(1:12)
+    number <- str_pad(number, 2, pad = '0')
+    tracker <- 1
+    temp_df <- tibble(well = c(1:384))
+    for (val in letter) {
+      for (num in number) {
+        temp_df$well[tracker] <- paste(val, num, sep = '')
+        tracker <- tracker + 1
+      }
+    }
+  }
+  else if (well_num == 384) {
+    letter <- LETTERS[1:16]
+    number <- c(1:24)
+    number <- str_pad(number, 2, pad = '0')
+    tracker <- 1
+    temp_df <- tibble(well = c(1:384))
+    for (val in letter) {
+      for (num in number) {
+        temp_df$well[tracker] <- paste(val, num, sep = '')
+        tracker <- tracker + 1
+      }
+    }
+  }
+  message('Well assignments created for ', well_num, '-well plate.')
+  df <- cbind(temp_df, df)
+  return(df)
+}
+
+# Assign compound ids and concentration from platemap
+plate_assignment <- function(df, platemap_file) {
+  id_df <- read_excel(platemap_file, sheet = 'sample') %>%
+    dplyr::select(-1) %>%
+    pivot_longer(., cols = 1:ncol(.)) %>%
+    rename(ncgc_id = value) %>%
+    dplyr::select(-c('name'))
+  id_df$ncgc_id <- gsub('empty', 'vehicle', id_df$ncgc_id)
+  conc_df <- read_excel(platemap_file, sheet = 'conc') %>%
+    dplyr::select(-1) %>%
+    pivot_longer(., cols = 1:ncol(.)) %>%
+    rename(conc = value) %>%
+    dplyr::select(-c('name'))
+  df <- cbind(id_df, conc_df, df)
+  message('Plate assignment attached to dataframe.')
+  df$row <- as.numeric(df$row)
+  df$col <- as.numeric(df$col)
+  return(df)
+}
+
+# Calculate AUC for each well
+print("####### loading calculate_auc...")
+calculate_auc <- function(df) {
+  #Retrieve temperatures to be used for AUC determination.
+  auc.df <- df %>%
+    dplyr::select(matches('t_\\d'))
+
+  #Initialize the AUC column
+  df$auc <- NA
+
+  # Pivot and clean each row for AUC model
+  for (i in 1:nrow(auc.df)) {
+    curveVals <- auc.df[i,] %>%
+      pivot_longer(cols = everything(),
+                   names_to = 'temp',
+                   values_to = 'response')
+    curveVals$temp <- curveVals$temp %>%
+      sub('t_', '', .)
+    curveVals$temp <- as.numeric(curveVals$temp)
+    df$auc[i] <- auc(x = curveVals$temp, y = curveVals$response)
+  }
+  message('AUC Values calculated for ', nrow(auc.df), ' wells.')
+  return(df)
+}
+
+control_grouping <- function(df, control = 'DMSO', pc = 'control') {
+  control_df <- filter(df, ncgc_id == control | ncgc_id == pc)
+  if (nrow(control_df) == 0) {
+    message('No control wells found. Review control input to function.')
+  } else
+    if (nrow(control_df) > 0) {
+      control_df <- control_df %>%
+        dplyr::select(-'conc')
+      return(control_df)
+    }
+}
+
+control_variability <-
+  function(df, nc = 'vehicle', pc = 'control') {
+    #Filter out positive and negative controls into their own df
+    nc.controls.df <- df %>%
+      filter(ncgc_id == nc) %>%
+      dplyr::select(-c('ncgc_id', 'well', 'row', 'col'))
+    pc.controls.df <- df %>%
+      filter(ncgc_id == pc) %>%
+      dplyr::select(-c('ncgc_id', 'well', 'row', 'col'))
+
+    #Calculate means, sd, and %CV
+    nc.mean.df <-
+      apply(nc.controls.df[1:ncol(nc.controls.df)], 2, mean)
+    nc.sd.df <- apply(nc.controls.df[1:ncol(nc.controls.df)], 2, sd)
+    pc.mean.df <-
+      apply(pc.controls.df[1:ncol(pc.controls.df)], 2, mean)
+    pc.sd.df <- apply(pc.controls.df[1:ncol(pc.controls.df)], 2, sd)
+
+    #Calculate %CV
+    nc.var.df <- tibble(nc.mean = nc.mean.df, nc.sd = nc.sd.df) %>%
+      mutate(nc.cv = (nc.sd / nc.mean) * 100)
+    pc.var.df <- tibble(pc.mean = pc.mean.df, pc.sd = pc.sd.df) %>%
+      mutate(pc.cv = (pc.sd / pc.mean) * 100)
+    analysis_method <- colnames(nc.controls.df)
+    var_df <- cbind(analysis_method, nc.var.df, pc.var.df)
+    message('Control group variability analyzed.')
+    return(var_df)
+  }
+
+# Returns thermogram with mean/sd of DMSO curve across temps
+control_thermogram <- function(df, pcTm, ncTm) {
+  subset_df <- subset(df, grepl('t_', analysis_method)) %>%
+    mutate(temp = as.numeric(gsub('t_', '', analysis_method))) %>%
+    dplyr::select(-'analysis_method')
+  therm_plot <- ggplot(subset_df, aes(x = temp)) +
+    geom_line(aes(y = nc.mean),
+              size = 1.5,
+              alpha = 0.75,
+              color = '#88CCEE') +
+    geom_errorbar(aes(ymin = nc.mean - nc.sd, ymax = nc.mean + nc.sd),
+                  size = 0.5,
+                  width = 1) +
+    geom_point(
+      aes(y = nc.mean),
+      size = 3.25,
+      shape = 21,
+      color = 'black',
+      fill = '#88CCEE'
+    ) +
+    geom_line(aes(y = pc.mean),
+              size = 1.5,
+              alpha = 0.75,
+              color = '#882255') +
+    geom_errorbar(aes(ymin = pc.mean - pc.sd, ymax = pc.mean + pc.sd),
+                  size = 0.5,
+                  width = 1) +
+    geom_point(
+      aes(y = pc.mean),
+      size = 3.25,
+      shape = 21,
+      color = 'black',
+      fill = '#EE3377'
+    ) +
+    theme_minimal() +
+    labs(title = 'Control Thermograms',
+         x = 'Temperature [C]',
+         y = 'Fraction Unfolded')
+  print(therm_plot)
+  return(therm_plot)
+}
+
+# Controls analysis and z' output for groups
+# Possible outputs:
+# output = 'plot': Cowplot of controls
+# output = 'df': Control dataframe
+control_analysis <-
+  function(df,
+           nc = 'vehicle',
+           pc = 'control',
+           output = '',
+           controlDF) {
+    controls.df <- df %>%
+      filter(ncgc_id == nc | ncgc_id == pc)
+
+    #Calculate Z' from controls for each parameter
+    test_params <-
+      c('Tm_fit',
+        'auc')
+    Tm.nc.mean <-
+      mean(controls.df$Tm_fit[controls.df$ncgc_id == nc])
+    Tm.nc.sd <- sd(controls.df$Tm_fit[controls.df$ncgc_id == nc])
+    Tm.pc.mean <-
+      mean(controls.df$Tm_fit[controls.df$ncgc_id == pc])
+    Tm.pc.sd <- sd(controls.df$Tm_fit[controls.df$ncgc_id == pc])
+    Tm.z <-
+      1 - (((3 * Tm.pc.sd) + (3 * Tm.nc.sd)) / abs(Tm.pc.mean - Tm.nc.mean))
+
+    message('Z\' for Tm: ', signif(Tm.z))
+    auc.nc.mean <- mean(controls.df$auc[controls.df$ncgc_id == nc])
+    auc.nc.sd <- sd(controls.df$auc[controls.df$ncgc_id == nc])
+    auc.pc.mean <- mean(controls.df$auc[controls.df$ncgc_id == pc])
+    auc.pc.sd <- sd(controls.df$auc[controls.df$ncgc_id == pc])
+    auc.z <-
+      1 - (((3 * auc.pc.sd) + (3 * auc.nc.sd)) / abs(auc.pc.mean - auc.nc.mean))
+    message('Z\' for AUC: ', signif(auc.z))
+
+    if (output == 'plot') {
+      Tm.plot <-
+        ggplot(controls.df, aes(x = ncgc_id, y = Tm_fit, fill = ncgc_id)) +
+        geom_boxplot(outlier.alpha = 0, size = 0.75) +
+        geom_jitter(shape = 21, size = 3) +
+        theme_minimal() +
+        scale_fill_hue() +
+        labs(title = 'Controls | Tagg',
+             subtitle = paste('Z\': ', signif(Tm.z), sep = '')) +
+        theme(
+          legend.position = 'none',
+          axis.title.x = element_blank(),
+          axis.text.x = element_text(size = 12, face = 'bold'),
+          axis.text.y = element_text(size = 10),
+          axis.title.y = element_text(size = 12, face = 'bold'),
+          plot.title = element_text(size = 12, face = 'bold')
+        )
+      auc.plot <-
+        ggplot(controls.df, aes(x = ncgc_id, y = auc, fill = ncgc_id)) +
+        geom_boxplot(outlier.alpha = 0, size = 0.75) +
+        geom_jitter(shape = 21, size = 3) +
+        theme_minimal() +
+        scale_fill_hue() +
+        labs(title = 'Controls | AUC',
+             subtitle = paste('Z\': ', signif(auc.z), sep = '')) +
+        theme(
+          legend.position = 'none',
+          axis.title.x = element_blank(),
+          axis.text.x = element_text(size = 12, face = 'bold'),
+          axis.text.y = element_text(size = 10),
+          axis.title.y = element_text(size = 12, face = 'bold'),
+          plot.title = element_text(size = 12, face = 'bold')
+        )
+      right.grid <-
+        plot_grid(Tm.plot, auc.plot, ncol = 1)
+      control.grid <-
+        plot_grid(
+          control_thermogram(controlDF, ncTm = Tm.nc.mean, pcTm = Tm.pc.mean),
+          right.grid,
+          ncol = 2,
+          nrow = 1
+        )
+      out <- paste(outdir,'controls.png',sep="/")
+      ggsave(out, dpi = 'retina', scale = 1.5)
+      return(control.grid)
+    }
+    if (output == 'df') {
+      means <-
+        c(Tm.nc.mean,
+          auc.nc.mean)
+      parameters <- c('Tm_fit', 'auc')
+      output.df <- tibble(parameters, means)
+      return(output.df)
+    }
+  }
+
+
+# Dose-response curve fit with LL.4 log-logistic fit
+# otrace = TRUE; output from optim method is displayed. Good for diagnostic
+# trace = TRUE; trace from optim displayed
+# robust fitting
+#   robust = 'lms': doesn't handle outlier/noisy data well
+dr_fit <- function(df) {
+  try(expr = {
+    drm(
+      resp ~ conc,
+      data = df,
+      type = 'continuous',
+      fct = LL.4(),
+      control = drmc(
+        errorm = FALSE,
+        maxIt = 10000,
+        noMessage = TRUE,
+      )
+    )
+  })
+}
+
+# Deprecated for now...
+# Perform drm on each compound at each temperature
+dr_analysis <-
+  function(df,
+           control = 'DMSO',
+           export_label = '',
+           plot = TRUE) {
+    # Construct df from the unique compound ids (less control) with empty analysis parameters
+    model_df <-
+      tibble(compound = (unique(filter(
+        df, ncgc_id != control
+      )$ncgc_id))) %>%
+      filter(compound != 'control')
+    for (i in 6:ncol(df)) {
+      col.nm <- colnames(df)[i]
+      model_df[, col.nm] <- NA
+    }
+
+    # Make a long df with the parameters (colnames above)
+    modelfit_df <- tibble(colnames(model_df)[2:ncol(model_df)])
+    names(modelfit_df)[1] <- 'analysis'
+
+    # Loop through each column in every row of modelfit_df and create a drm model for each and
+    # add statistics and readouts to a temp df that is bound to modelfit_df
+    for (i in 1:nrow(model_df)) {
+      # Create a working df with the raw data from compound[i]
+      temp_df <-
+        filter(df, df$ncgc_id == model_df$compound[(i)]) %>%
+        dplyr::select(-c('well', 'row', 'col'))
+      print(paste('Analyzing: ', model_df$compound[i]), sep = '')
+
+      # This temp df will hold the statistics that we read out from each model, and is reset every time.
+      # Parameters to include:
+      # ec50: EC50 reading of the curve fit
+      # pval: curve fit pvalue
+      # noEffect: p-value of the noEffect test of the dose-response
+      # hill: LL4 parameter B
+      # ec50: LL4 parameter A
+      # lowerlim: LL4 parameter C
+      # upperlim: LL4 parameter D
+      temp_modelfit_df <- modelfit_df[1] %>%
+        mutate(
+          ec50 = 0,
+          noEffect = 0,
+          hill = 0,
+          lowerlim = 0,
+          upperlim = 0,
+          ec50 = 0
+        )
+      # Iterate through columns
+      for (n in 3:ncol(temp_df)) {
+        #Make df for drm model by selecting concentration and appropriate column
+        dr_df <- temp_df %>% dplyr::select(c(2, n))
+        colnames(dr_df)[1] <- 'conc'
+        colnames(dr_df)[2] <- 'resp'
+        temp.model <-
+          drm(
+            resp ~ conc,
+            data = dr_df,
+            fct = LL.4(),
+            control = drmc(
+              errorm = FALSE,
+              maxIt = 500,
+              noMessage = TRUE
+            )
+          )
+        # Construct fitted curve for plotting
+        pred.fit <-
+          expand.grid(pr.x = exp(seq(log(min(
+            dr_df[1]
+          )), log(max(
+            dr_df[1]
+          )), length = 1000)))
+        # Seems necessary to make loop continue through curves that can't be fit... NEED TO STUDY
+        if ("convergence" %in% names(temp.model) == FALSE) {
+          pm <-
+            predict(object = temp.model,
+                    newdata = pred.fit,
+                    interval = 'confidence')
+          pred.fit$p <- pm[, 1]
+          pred.fit$pmin <- pm[, 2]
+          pred.fit$pmax <- pm[, 3]
+          # Plot out dose response curve if conditional met in function
+          if (plot == TRUE) {
+            dr_plot <- ggplot(dr_df, aes(x = conc, y = resp)) +
+              geom_line(
+                data = pred.fit,
+                aes(x = pr.x, y = p),
+                size = 1.5,
+                color = 'black'
+              ) +
+              geom_point(
+                size = 4,
+                shape = 21,
+                fill = 'orange',
+                color = 'black'
+              ) +
+              scale_x_log10() +
+              theme_cowplot() +
+              labs(
+                title = paste(
+                  'Analysis of ',
+                  model_df$compound[i],
+                  ' by ',
+                  colnames(temp_df)[n],
+                  sep = ''
+                ),
+                subtitle = paste(
+                  'EC50: ',
+                  signif(temp.model$coefficients[4], 3),
+                  ' nM',
+                  '\n',
+                  'Significance of noEffect Test: ',
+                  signif(noEffect(temp.model)[3], 3),
+                  sep = ''
+                ),
+                x = 'Concentration'
+              )
+            print(dr_plot)
+
+            out <- paste(outdir,'dr_curves', export_label, sep="/")
+            png(
+              filename = paste(
+                out,
+                '_',
+                model_df$compound[i],
+                colnames(temp_df)[n],
+                '.png',
+                sep = ''
+              ),
+              width = 3200,
+              height = 1800,
+              res = 300
+            )
+            print(dr_plot)
+            dev.off()
+          }
+          print(n)
+          # Extract fit parameters for the dr model
+          temp_modelfit_df$ec50[(n - 2)] <-
+            signif(temp.model$coefficients[4], 3)
+          temp_modelfit_df$noEffect[(n - 2)] <-
+            signif(noEffect(temp.model)[3], 3)
+          temp_modelfit_df$hill[(n - 2)] <-
+            signif(temp.model$coefficients[1], 3)
+          temp_modelfit_df$lowerlim[(n - 2)] <-
+            signif(temp.model$coefficients[2], 3)
+          temp_modelfit_df$upperlim[(n - 2)] <-
+            signif(temp.model$coefficients[3], 3)
+        }
+      }
+      modelfit_df <- modelfit_df %>% cbind(., temp_modelfit_df[2:6])
+      names(modelfit_df)[names(modelfit_df) == 'ec50'] <-
+        paste('ec50_', model_df$compound[i], sep = '')
+      names(modelfit_df)[names(modelfit_df) == 'noEffect'] <-
+        paste('noEffect_', model_df$compound[i], sep = '')
+      names(modelfit_df)[names(modelfit_df) == 'hill'] <-
+        paste('hill_', model_df$compound[i], sep = '')
+      names(modelfit_df)[names(modelfit_df) == 'lowerlim'] <-
+        paste('lowerlim_', model_df$compound[i], sep = '')
+      names(modelfit_df)[names(modelfit_df) == 'upperlim'] <-
+        paste('upperlim_', model_df$compound[i], sep = '')
+    }
+    return(modelfit_df)
+  }
+
+
+
+# Extract residual sum of squares for dmso columns
+# Returns df with all dmso values ready for model fit
+dmso_rss <- function(df, control = 'DMSO') {
+  df_rss <- df %>%
+    dplyr::select(starts_with("t_")) %>%
+    pivot_longer(cols = everything())
+  colnames(df_rss)[1] <- 'conc'
+  colnames(df_rss)[2] <- 'resp'
+  df_rss$conc <- as.integer(gsub('t_', '', df_rss$conc))
+  message('Fitting DMSO thermogram...')
+  rss_model <- dr_fit(df_rss)
+  rss_dmso <- sum(residuals(rss_model) ^ 2)
+  message('DMSO RSS: ', signif(rss_dmso, 6))
+  plot(
+    rss_model,
+    type = 'all',
+    cex = 0.5,
+    main = paste('DMSO Thermogram Fit\n', 'RSS: ', signif(rss_dmso, 5), sep =
+                   ''),
+    sub = paste('DMSO RSS: ', signif(rss_dmso, 5), sep = ''),
+    xlab = 'Temperature',
+    ylab = 'Fraction Unfolded',
+    ylim = c(-0.25, 1.25)
+  )
+  return(df_rss)
+}
+
+compare_models <- function(df, dmso.rss.df, plot = FALSE) {
+  temp_df <- df %>% dplyr::select(-one_of(
+    'Ea_fit',
+    'Tf_fit',
+    'kN_fit',
+    'bN_fit',
+    'kU_fit',
+    'bU_fit',
+    'S'
+  )) %>%
+    filter(ncgc_id != 'DMSO' & ncgc_id != 'ignore')
+  rss_df <- temp_df %>%
+    dplyr::select(-starts_with('t_'))
+  rss_df$null.rss <- NA
+  rss_df$alt.rss <- NA
+  rss_df$rss.diff <- NA
+
+  dmso.model <- dr_fit(dmso.rss.df)
+  dmso.rss <- sum(residuals(dmso.model) ^ 2)
+  for (i in 1:nrow(temp_df)) {
+    cmpnd.df <- temp_df[i, ] %>%
+      dplyr::select(starts_with('t_')) %>%
+      pivot_longer(cols = everything())
+    colnames(cmpnd.df)[1] <- 'conc'
+    colnames(cmpnd.df)[2] <- 'resp'
+    cmpnd.df$conc <- as.integer(gsub('t_', '', cmpnd.df$conc))
+
+    # Fitting the null model
+    null.model <- bind_rows(dmso.rss.df, cmpnd.df)
+    null.drm <- dr_fit(null.model)
+    null.rss <- sum(residuals(null.drm) ^ 2)
+    rss_df$null.rss[i] <- null.rss
+    message(
+      'Null model for ',
+      temp_df$ncgc_id[i],
+      ' at concentration \' ',
+      temp_df$conc[i],
+      '\': ',
+      signif(null.rss, 6)
+    )
+    if (plot == TRUE) {
+      plot(null.drm,
+           type = 'all',
+           cex = 0.5,
+           main = 'Null model fit')
+    }
+    # Fitting the alternate model
+    cmpnd.drm <- dr_fit(cmpnd.df)
+    cmpnd.rss <- sum(residuals(cmpnd.drm) ^ 2)
+    alt.rss <- sum(cmpnd.rss, dmso.rss)
+    rss_df$alt.rss[i] <- alt.rss
+    message (
+      'Alternate Model for ',
+      temp_df$ncgc_id[i],
+      ' at concentration \' ',
+      temp_df$conc[i],
+      ' \': ',
+      signif(alt.rss, 6)
+    )
+    rss.diff <- null.rss - alt.rss
+    message('RSS.0 - RSS.1: ', signif(rss.diff, 6))
+    rss_df$rss.diff[i] <- rss.diff
+  }
+  return(rss_df)
+}
+
+# Calculate the traditional melting parameters from the full + rss model and output df
+# of the parameters for each compound
+calculate_meltingparams <- function (df, control = 'vehicle') {
+  #Standard parameters to test:
+  test_params <- c('dHm_fit', 'Tm_fit', 'dG_std', 'T_onset')
+
+  #Set up to loop through entire dataframe for each of the above params
+  for (i in 1:length(test_params)) {
+    #Initialize the column name
+    current_param <- test_params[i]
+    df[, paste(current_param, '_diff', sep = '')] <- NA
+
+    #First, calculate the mean of control columns
+    mean_control <- mean(df[[current_param]][df$ncgc_id == control])
+
+    #Then, subtract this mean value from each well in the plate in a new column.
+    #Can't figure out how to mutate with a pasted column name...
+    for (i in 1:nrow(df)) {
+      df[i, paste(current_param, '_diff', sep = '')] <-
+        df[i, current_param] - mean_control
+    }
+
+    #Print out mean and stdev of vehicle for each condition
+    std_control <- sd(df[[current_param]][df$ncgc_id == control])
+    message('Vehicle mean for ',
+            current_param,
+            ': ',
+            mean_control,
+            ' (SD: ',
+            std_control,
+            ')')
+  }
+  return(df)
+}
+
+# Print out the volcano plots for each parameter and RSS vs. p-val
+plot_volcanos <- function(df, save = TRUE) {
+  test_params <-
+    c('Tm_fit.maxDiff',
+      'auc.maxDiff')
+  test_pval <-
+    c('Tm_fit.maxDiff',
+      'auc.maxDiff')
+
+  # Plot out RSS Difference(x) vs. Parameter Difference(y)
+  # Conditional fill: grey/alpha if not significant in either
+  #   grey/alpha if not significant in either #DDDDDD
+  #   teal if by parameter only #009988
+  #   orange if by NPARC only #EE7733
+  #   wine if by both #882255
+  # NEED TO CODE THIS BETTER WTF
+  for (i in 1:length(test_params)) {
+    current_param <- test_params[i]
+    current_pval <- test_pval[i]
+    plot.df <- df %>%
+      dplyr::select(compound,
+                    rss.diff,
+                    mannwhit.pval,
+                    one_of(current_param),
+                    one_of(current_pval))
+    # Assign significance testing outcomes
+    plot.df$sigVal <-
+      case_when((plot.df$mannwhit.pval < 0.05 &
+                   plot.df[, current_pval] < 0.05) ~ 'Both',
+                (plot.df$mannwhit.pval < 0.05 &
+                   plot.df[, current_pval] >= 0.05) ~ 'RSS NPARC',
+                (plot.df$mannwhit.pval >= 0.05 &
+                   plot.df[, current_pval] < 0.05) ~ 'Parameter',
+                (plot.df$mannwhit.pval >= 0.05 &
+                   plot.df[, current_pval] >= 0.05) ~ 'Insignificant'
+      )
+
+    fillvalues <-
+      c('Both', 'RSS NPARC', 'Parameter', 'Insignificant')
+    colors <- c('#882255', '#EE7733', '#009988', '#DDDDDD')
+    volcano_plot <-
+      ggplot(plot.df,
+             aes(x = rss.diff,
+                 y = plot.df[, current_param],
+                 label = compound)) +
+      geom_point(shape = 21,
+                 aes(fill = sigVal),
+                 size = 5) +
+      theme_minimal() +
+      labs(
+        title = paste('Residual Variance vs. ', current_param, sep = ''),
+        y = paste(current_param, ' Experimental - Vehicle Mean', sep = ''),
+        x = 'RSS0 - RSS1 NPARC',
+        fill = 'Significance Detected'
+      ) +
+      scale_fill_manual(breaks = fillvalues, values = colors) +
+      theme(legend.position = 'bottom')
+    print(volcano_plot)
+    out <- paste(outdir,"/",current_param, sep="")
+    ggsave(
+      paste(out, '_volcano.png', sep = ''),
+      dpi = 'retina',
+      scale = 1.25
+    )
+  }
+}
+
+# Plot out RSS Difference by p-value for the MannWhitney
+rss.pval.plot <- function (df, savePlot = FALSE) {
+  plot.df <- df %>%
+    dplyr::select(compound, rss.diff, mannwhit.pval, mannwhit.ec50)
+  plot.df$mannwhit.pval <- log2(plot.df$mannwhit.pval)
+
+  rss.plot <-
+    ggplot(plot.df,
+           aes(x = rss.diff, y = mannwhit.pval, fill = mannwhit.ec50)) +
+    geom_point(shape = 21, size = 3.5) +
+    theme_minimal() +
+    scale_fill_gradient(low = '#EE3377',
+                        high = '#88CCEE',
+                        na.value = 'grey20') +
+    labs(
+      title = 'RSS vs. Mann Whitney P-val',
+      x = 'RSS0 - RSS1',
+      y = 'Log2 Mann Whitney P-val',
+      fill = 'NPARC EC50'
+    )
+  print(rss.plot)
+  if (savePlot == TRUE) {
+    out <- paste(outdir,'/rssPvalcomp.png',sep="")
+    ggsave(out,
+      dpi = 'retina',
+      scale = 1.25
+    )
+  }
+}
+
+#
+parameter_doseresponse <-
+  function(df, control = 'vehicle', plot = TRUE) {
+    #First calculate the mean for the control/vehicle condition, and construct df with this.
+    parameter_df <-
+      tibble(compound = (unique(filter(
+        df, ncgc_id != control
+      )$ncgc_id)))
+    for (i in 1:nrow(parameter_df)) {
+
+    }
+  }
+
+calculate_zscore <-
+  function(df, control = 'vehicle', plot = FALSE) {
+    test_params <-
+      c('Tm_fit')
+    for (i in 1:length(test_params)) {
+      current_param <- test_params[i]
+      mean_control <-
+        mean(df[[current_param]][df$ncgc_id == control])
+      std_control <- sd(df[[current_param]][df$ncgc_id == control])
+      df[, paste(current_param, '_zscore', sep = '')] <- NA
+      for (i in 1:nrow(df)) {
+        df[i, paste(current_param, '_zscore', sep = '')] <-
+          (df[i, current_param] - mean_control) / std_control
+      }
+    }
+    return(df)
+  }
+
+convert_zscore <- function(df, control = 'vehicle', plot = FALSE) {
+  test_params <-
+    c('Tm_fit_zscore')
+  for (i in 1:length(test_params)) {
+    current_param <- test_params[i]
+    mean_control <- mean(df[[current_param]][df$ncgc_id == control])
+    std_control <- sd(df[[current_param]][df$ncgc_id == control])
+    df[, paste(current_param, '_pval', sep = '')] <- NA
+
+    #Calculate p value for normal distribution from z score for each column.
+    for (i in 1:nrow(df)) {
+      df[i, paste(current_param, '_pval', sep = '')] <-
+        2 * pnorm(-abs(df[i, current_param]))
+    }
+  }
+  return(df)
+}
+
+# Fit the null model to a set of conc ~ resp values.
+# Returns: RSS values
+# Requires; df with concentration and response values
+# df[1]: concentration values
+# df[2]: response values
+fit_nullmodel <- function(df,
+                          plot.model,
+                          graphTitle = '') {
+  null.model <- lm(resp ~ 1, data = df)
+  #Plot if TRUE. For diagnostic use mostly...
+  if (plot.model == TRUE) {
+    out <- paste(outdir,'models', sep="/")
+    try(jpeg(filename = paste(out, graphTitle, '.jpg', sep =
+                                '')))
+    try(plot(
+      df$conc,
+      df$resp,
+      main = graphTitle,
+      pch = 21,
+      cex = 3,
+      col = 'black',
+      bg = 'orange',
+      lwd = 3
+    ))
+    try(abline(null.model, col = 'black', lwd = 3))
+    try(dev.off()
+    )
+  }
+  #Return squared residuals for null model
+  # message('NUll Model RSS: ',
+  #         (sum(residuals(null.model) ^ 2)))
+  return(sum(residuals(null.model) ^ 2))
+}
+
+# Fit the alternate model log logistic to a set of conc ~ resp values.
+# Returns: RSS values
+# Requires: same as null_model
+fit_altmodel <- function(df,
+                         plot.model,
+                         graphTitle = '') {
+  alt.model <- dr_fit(df)
+  if (plot.model == TRUE) {
+    out <- paste(outdir,'models', sep="/")
+    try(jpeg(filename = paste(out, graphTitle, '.jpg', sep =
+                                '')))
+    try(plot(
+      alt.model,
+      main = graphTitle,
+      pch = 21,
+      cex = 3,
+      col = 'black',
+      bg = 'cyan',
+      lwd = 3
+    ))
+    try(dev.off()
+    )
+  }
+  # message('Alternate Model RSS: ',
+  #         (sum(residuals(alt.model) ^ 2)))
+  return(sum(residuals(alt.model) ^ 2))
+}
+
+# Derive RSS values for null and alternate model for each compound from full_df
+compute.rss.models <-
+  function(df,
+           control = 'DMSO',
+           plotModel = TRUE,
+           rssPlot = TRUE,
+           drPlot = TRUE) {
+    #Construct tibble of unique compounds names
+    rss.df <- tibble(compound = (unique(
+      filter(df, ncgc_id != control | ncgc_id != 'vehicle')$ncgc_id
+    ))) %>%
+      filter(compound != 'control') %>%
+      filter(compound != 'vehicle')
+    rss.df$null.model.n <- NA
+    rss.df$alt.model.n <- NA
+    rss.df$null.model.sum <- NA
+    rss.df$alt.model.sum <- NA
+    rss.df$null.model.sd <- NA
+    rss.df$alt.model.sd <- NA
+    rss.df$rss.diff <- NA
+    rss.df$mannwhit.pval <- NA
+    rss.df$mannwhit.ec50 <- NA
+
+    for (i in 1:nrow(rss.df)) {
+      #Construct df for current compound
+      fit.df <- df %>% filter(ncgc_id == toString(rss.df[i, 1])) %>%
+        dplyr::select(ncgc_id, conc, starts_with('t_')) %>%
+        dplyr::select(!contains('onset'))
+
+      #Plot out dose-response thermogram here?
+      if (drPlot == TRUE) {
+        dr.thermogram(fit.df, target = rss.df$compound[i])
+      }
+
+      #Construct a df to hold the rss values until final calculations of mean,sd,N
+      cmpnd.fit.df <- fit.df %>%
+        dplyr::select(starts_with('t_'))
+      cmpnd.fit.df <- tibble(temp = colnames(cmpnd.fit.df))
+      cmpnd.fit.df$null <- NA
+      cmpnd.fit.df$alt <- NA
+
+      #Iterate through each temperature, construct df, perform rss analysis, and add to cmpnd.fit.df
+      for (t in 3:ncol(fit.df)) {
+        current.fit.df <- fit.df %>%
+          dplyr::select(1:2, colnames(fit.df)[t])
+        colnames(current.fit.df)[3] <- 'resp'
+        cmpnd.fit.df$null[t - 2] <-
+          fit_nullmodel(current.fit.df,
+                        plot.model = plotModel,
+                        graphTitle = as.character(paste(
+                          current.fit.df[1, 1], ' Null Model at ', colnames(fit.df)[t], sep = ''
+                        )))
+        cmpnd.fit.df$alt[t - 2] <-
+          fit_altmodel(current.fit.df,
+                       plot.model = plotModel,
+                       graphTitle = as.character(paste(
+                         current.fit.df[1, 1], ' Alternate Model at ', colnames(fit.df)[t], sep = ''
+                        )))
+      }
+      # RSS0-RSS1
+      cmpnd.fit.df <- cmpnd.fit.df %>%
+        mutate(diff = null - alt)
+      #Now, we calculate and assign rss values for both models in the rss.df for this compound.
+      rss.df$null.model.n[i] <- length(na.omit(cmpnd.fit.df$null))
+      rss.df$alt.model.n[i] <- length(na.omit(cmpnd.fit.df$alt))
+      rss.df$null.model.sum[i] <- sum(cmpnd.fit.df$null)
+      rss.df$alt.model.sum[i] <- sum(cmpnd.fit.df$alt)
+      rss.df$null.model.sd[i] <- sd(cmpnd.fit.df$null)
+      rss.df$alt.model.sd[i] <- sd(cmpnd.fit.df$alt)
+      rss.df$rss.diff[i] <-
+        sum(cmpnd.fit.df$null) - sum(cmpnd.fit.df$alt)
+
+      #Perform Mann-Whitney iU test on alternative vs. null model dataframe for compound.
+      mann.whit <-
+        wilcox.test(x = cmpnd.fit.df$null,
+                    y = cmpnd.fit.df$alt,
+                    exact = TRUE)
+      rss.df$mannwhit.pval[i] <- mann.whit$p.value
+
+      #Message out RSS0-RSS1 and p value
+      message('RSS Difference for ',
+              rss.df[i, 1],
+              ': ',
+              rss.df$rss.diff[i])
+      message('Mann-Whitney U Test p-val: ',
+              rss.df$mannwhit.pval[i])
+
+      # Construct drc model and derive ec50 if p-val is significant
+      if (rss.df$mannwhit.pval[i] <= 0.05) {
+        #Find what temperature is the max point
+        rss.max.temp <-
+          cmpnd.fit.df$temp[cmpnd.fit.df$diff == max(cmpnd.fit.df$diff)]
+        #Construct df ready for drc at max temperature
+        rss.drc.df <- fit.df %>%
+          dplyr::select(conc, one_of(rss.max.temp))
+        colnames(rss.drc.df)[2] <- 'resp'
+        rss.drc.model <- dr_fit(rss.drc.df)
+        ec50.temp <- rss.drc.model$coefficients[4]
+        if (length(ec50.temp != 0)) {
+          rss.df$mannwhit.ec50[i] <- signif(ec50.temp, 3)
+        }
+      }
+
+      #Plot the RSS values across the temperature range if true
+      if (rssPlot == TRUE) {
+        # First, clean up the temperatures
+        cmpnd.fit.df$temp <- sub('t_', '', cmpnd.fit.df$temp)
+        cmpnd.fit.df$temp <- as.numeric(cmpnd.fit.df$temp)
+
+        #Plot RSS as
+        rss.plot <- ggplot(cmpnd.fit.df, aes(x = temp, y = diff)) +
+          geom_point(shape = 21,
+                     size = 4,
+                     fill = '#AA4499') +
+          theme_minimal() +
+          labs(
+            title = paste(current.fit.df[1, 1], ' RSS Difference', sep = ''),
+            subtitle = paste('Mann-Whitney U pval: ', signif(rss.df$mannwhit.pval[i])),
+            sep = '',
+            y = 'RSS0-RSS1',
+            x = 'Temperature [C]'
+          )
+        print(rss.plot)
+        out <- paste(outdir,'models', current.fit.df[1, 1], sep="/")
+        ggsave(
+          filename = paste(out, '_rss.png', sep =''),
+          scale = 1.25,
+          dpi = 'retina'
+        )
+      }
+    }
+    return(rss.df)
+  }
+
+# Compute the rss difference and significance for each of the parameters
+compute_parameter.rssmodel <- function(df, plotModel = FALSE) {
+  #Test parameters for standard model
+  test_params <-
+    c('Tm_fit',
+      'auc')
+
+  #Construct df of unique compounds and initialize parameter readouts.
+  param.rss.df <- tibble(compound = (unique(
+    filter(df, ncgc_id != 'control' & ncgc_id != 'vehicle')$ncgc_id
+  )))
+  param.rss.df$Tm_fit.ec50 <- as.numeric(NA)
+  param.rss.df$Tm_fit.pval <- as.numeric(NA)
+  param.rss.df$Tm_fit.maxDiff <- as.numeric(NA)
+  param.rss.df$auc.ec50 <- as.numeric(NA)
+  param.rss.df$auc.pval <- as.numeric(NA)
+  param.rss.df$auc.maxDiff <- as.numeric(NA)
+
+  control.means <- control_analysis(df, output = 'df')
+
+  for (i in 1:nrow(param.rss.df)) {
+    cmpnd.fit.df <- df %>%
+      filter(ncgc_id == param.rss.df$compound[i])
+    #Now iterate through columns in test_params
+    for (p in 1:length(test_params)) {
+      current_param <- test_params[p]
+      current.fit.df <- cmpnd.fit.df %>%
+        dplyr::select(ncgc_id, conc, I(test_params[p]))
+      colnames(current.fit.df)[3] <- 'resp'
+      current.model <- dr_fit(current.fit.df)
+
+      #Workaround to avoid drm that can't converge
+      if (class(current.model) != 'list') {
+        param.rss.df[i, paste(current_param, '.pval', sep = '')] <-
+          noEffect(current.model)[3]
+
+        param.rss.df[i, paste(current_param, '.ec50', sep = '')] <-
+          summary(current.model)$coefficients[4]
+
+        #Calculate the maximum difference in param and subtract negative control mean from it.
+        current.fit.df$absDiff <-
+          abs(current.fit.df$resp - control.means$means[control.means$parameters ==
+                                                          current_param])
+        param.rss.df[i, paste(current_param, '.maxDiff', sep = '')] <-
+          current.fit.df$resp[current.fit.df$absDiff == max(current.fit.df$absDiff)] - control.means$means[control.means$parameters ==
+                                                                                                             current_param]
+
+        message('Analyzing Compound ', param.rss.df[i, 1], '...')
+        message(current_param)
+        message('EC50: ', param.rss.df[i, paste(current_param, '.ec50', sep =
+                                                  '')])
+        message('No Effect ANOVA p-val: ', signif(param.rss.df[i, paste(current_param, '.pval', sep =
+                                                                          '')]), 1)
+        if (plotModel == TRUE) {
+          out <- paste(outdir,'models', param.rss.df[i, 1], sep="/")
+          png(
+            filename = paste(
+              out,
+              '_',
+              current_param,
+              '.png',
+              sep = ''
+            ),
+            bg = 'transparent'
+          )
+          plot(
+            current.model,
+            main = paste(
+              param.rss.df[i, 1],
+              '\n',
+              ' NoEffect pval: ',
+              signif(param.rss.df[i, paste(current_param, '.pval', sep =
+                                             '')]),
+              '\n',
+              'EC50: ',
+              signif(param.rss.df[i, paste(current_param, '.ec50', sep =
+                                             '')]),
+              '\n',
+              current_param
+            )
+          )
+          dev.off()
+        }
+      }
+    }
+  }
+  return(param.rss.df)
+}
+
+# Creates a thermogram with all concentrations of a target for plotting
+# Must match exact ncgc_id in well assignment..
+dr.thermogram <- function(df, target = '') {
+  # Create df with the compound, conc, and temperature columns
+  # df <- df %>%
+  #   dplyr::select(ncgc_id, conc, matches('t_\\d')) %>%
+  #   filter(., ncgc_id == target)
+  df <- df %>%
+    pivot_longer(cols = 3:ncol(df),
+                 names_to = 'temp',
+                 values_to = 'resp')
+  df$temp <- as.numeric(sub('t_', '', df$temp))
+
+  dr.plot <- ggplot(df, aes(
+    y = resp,
+    x = temp,
+    fill = as.factor(signif(conc)),
+    group_by(signif(conc))
+  )) +
+    geom_line(color = 'black',
+              alpha = 0.8,
+              size = 1) +
+    geom_point(shape = 21, size = 3) +
+    theme_minimal() +
+    scale_color_viridis_d() +
+    labs(
+      title = paste('Dose-Response Thermogram for ', target, sep = ''),
+      x = 'Temperature [C]',
+      y = 'Response',
+      fill = 'Concentration'
+    ) +
+    theme()
+  print(dr.plot)
+  out <- paste(outdir,'models', 'dr_', sep="/")
+  ggsave(
+    filename = paste(out, target, '.png', sep = ''),
+    scale = 1.25,
+    dpi = 'retina'
+  )
+  return(dr.plot)
+}
+
+# Export heatmaps of EC50 and P-values across analysis parameters
+# Pass in parameters df
+parameter_heatmaps <- function(df, plotHeat = FALSE) {
+  ec50.heat.df <- df %>%
+    dplyr::select(compound, contains('ec50')) %>%
+    pivot_longer(cols = !compound,
+                 names_to = 'parameter',
+                 values_to = 'ec50') %>%
+    mutate(ec50 = log10(ec50))
+  ec50.heat.df$parameter <- ec50.heat.df$parameter %>%
+    sub('.ec50', '', .)
+
+  ec50.heat.plot <-
+    ggplot(ec50.heat.df,
+           aes(
+             x = parameter,
+             y = compound,
+             fill = ec50,
+             label = signif(ec50)
+           )) +
+    geom_tile(color = 'black') +
+    geom_text(alpha = 0.85, size = 2.5) +
+    theme_minimal() +
+    scale_fill_gradientn(colors = c('#EE3377', '#DDCC77', '#88CCEE'), ) +
+    labs(title = 'EC50 Parameter Comparison',
+         fill = 'Log EC50') +
+    theme(
+      axis.title.y = element_blank(),
+      axis.title.x = element_blank(),
+      axis.text.x = element_text(size = 12, face = 'bold')
+    )
+
+  pval.heat.df <- df %>%
+    dplyr::select(compound, contains('pval')) %>%
+    pivot_longer(cols = !compound,
+                 names_to = 'parameter',
+                 values_to = 'pval') %>%
+    mutate(sigVal = ifelse(pval < (0.05 / length(unique(
+      df
+    ))), 'Significant', 'Insignificant'))
+  pval.heat.df$parameter <- pval.heat.df$parameter %>%
+    sub('.pval', '', .)
+  pval.heat.plot <-
+    ggplot(pval.heat.df,
+           aes(
+             x = parameter,
+             y = compound,
+             fill = sigVal,
+             label = signif(pval)
+           )) +
+    geom_tile(color = 'black') +
+    geom_text(alpha = 0.85, size = 2.5) +
+    theme_minimal() +
+    labs(title = 'P-Value Parameter Comparison',
+         fill = 'P-Value') +
+    theme(
+      axis.title.y = element_blank(),
+      axis.title.x = element_blank(),
+      axis.text.x = element_text(size = 12, face = 'bold'),
+
+    )
+  if (plotHeat == TRUE) {
+    print(pval.heat.plot)
+    out <- paste(outdir, 'pval_heatmap.png' , sep="/")
+    ggsave(out,
+           dpi = 'retina',
+           scale = 1.25)
+    print(ec50.heat.plot)
+    out <- paste(outdir, 'ec50_heatmap.png' , sep="/")
+    ggsave(out,
+           dpi = 'retina',
+           scale = 1.25)
+  }
+}
+
+# Mutates a binary variable testing each analysis method for significance
+# 0 if insignificant
+# 1 if significant
+determineSig <- function(df, alpha = 0.05) {
+  analysisMethods <- dplyr::select(df, contains('pval'))
+  analysisMethods <- colnames(analysisMethods)
+  analysisMethodsNames <- sub('pval', 'pval.sig', analysisMethods)
+  sigVal <- alpha / nrow(df)
+  for (i in 1:length(analysisMethods)) {
+    df[, analysisMethodsNames[i]] <-
+      ifelse(df[, analysisMethods[i]] < sigVal, 1, 0)
+    df[is.na(df)] <- 0
+  }
+  return(df)
+}
+
+# Use after determineSig from above
+rankOrder <- function(df) {
+  methodSig <- dplyr::select(df, contains('pval.sig'))
+  methodSig <- colnames(methodSig)
+  methodRank <- sub('pval.sig', 'rankOrder', methodSig)
+  methods <- sub('.rankOrder', '', methodRank)
+  methodsEC <- paste(methods, '.ec50', sep = '')
+
+  for (i in 1:length(methods)) {
+    rank.df <- filter(df, df[, (methodSig[i])] == 1)
+    rank.df[, methodRank[i]] <-
+      as.integer(rank(rank.df[, methodsEC[i]]))
+    df <- left_join(df, rank.df)
+  }
+  return(df)
+}
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main.R
new file mode 100644
index 0000000..118d0cf
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main.R
@@ -0,0 +1,59 @@
+suppressWarnings(library(argparse))
+suppressWarnings(library(logging))
+
+
+# Initialize the logger
+basicConfig()
+
+# Setup the argument parsing
+addFitParams <- function(parser) {
+  print("addFitParams")
+  parser$add_argument("--params", type = "character",help="Fit params csv file")
+  invisible(NULL)
+}
+addBaselineCorrected <- function(parser) {
+  parser$add_argument("--values", type = "character",help="Baseline corrected csv file")
+  invisible(NULL)
+}
+addPlateMap <- function(parser) {
+  parser$add_argument("--platemap", type = "character",help="platemap excel file")
+  invisible(NULL)
+}
+addOutputArgs <- function(parser) {
+  parser$add_argument("--outdir", type = "character",help="Output csv file")
+  invisible(NULL)
+}
+getAllParser <- function() {
+  parser <- ArgumentParser(description="ALL PARSER")
+  addFitParams(parser)
+  addBaselineCorrected(parser)
+  addPlateMap(parser)
+  addOutputArgs(parser)
+  return(parser)
+}
+
+# Parse the arguments
+parser <- getAllParser()
+args <- parser$parse_args()
+
+
+#Path to csvfile directory
+params <- args$params
+loginfo('params = %s', params)
+
+values <- args$values
+loginfo('values = %s', values)
+
+plate_map <- args$platemap
+loginfo('platemap = %s', plate_map)
+
+outdir <- args$outdir
+loginfo('outdir = %s', outdir)
+
+loginfo('params (fit params) = %s', params)
+loginfo('values (baseline corrected): %s', values)
+loginfo('platemap file (plate metadata): %s', plate_map)
+loginfo('outdir (output directory): %s', outdir)
+
+source('./prepare_data.R')
+source('./main_analysis.R')
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main_analysis.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main_analysis.R
new file mode 100644
index 0000000..53ef628
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main_analysis.R
@@ -0,0 +1,92 @@
+# #  @@@@@@@   @@@@@@@              @@@@@@@  @@@@@@@@  @@@@@@@   @@@@@@    @@@@@@
+# #  @@@@@@@@  @@@@@@@             @@@@@@@@  @@@@@@@@  @@@@@@@  @@@@@@@   @@@@@@@@
+# #  @@!  @@@    @@!               !@@       @@!         @@!    !@@       @@!  @@@
+# #  !@!  @!@    !@!               !@!       !@!         !@!    !@!       !@!  @!@
+# #  @!@!!@!     @!!    @!@!@!@!@  !@!       @!!!:!      @!!    !!@@!!    @!@!@!@!
+# #  !!@!@!      !!!    !!!@!@!!!  !!!       !!!!!:      !!!     !!@!!!   !!!@!!!!
+# #  !!: :!!     !!:               :!!       !!:         !!:         !:!  !!:  !!!
+# #  :!:  !:!    :!:               :!:       :!:         :!:        !:!   :!:  !:!
+# #  ::   :::     ::                ::: :::   :: ::::     ::    :::: ::   ::   :::
+# # NonParametric Multiparameter Analysis of CETSA/RT-CETSA Experimental Sets
+# #
+# # Written by: Michael Ronzetti {NIH/NCATS/UMD}
+# # Patents: PCT/US21/45184, HHS E-022-2022-0-US-01
+# # Main Analysis
+
+library(tidyverse)
+library(readxl)
+library(stringr)
+library(drc)
+library(ggthemes)
+library(cowplot)
+library(hrbrthemes)
+library(ggpubr)
+library(MESS)
+library(devtools)
+
+# BECAUSE OF BUG
+pdf(file = NULL)
+
+
+# EXPERIMENTAL PARAMETERS AND SETUP
+#
+# Input experiment parameters here
+
+startTemp <- 37
+endTemp <- 95
+plate_format <- 384
+control <- 'vehicle'
+pc <- 'control'
+
+# read in preprocessed data
+merge_molten_filename <- "test_merged_molten_final.csv"
+output_path <- paste(outdir, merge_molten_filename, sep="/")
+loginfo('reading preprocessed data from : %s', output_path)
+source('./functions.R')
+full_df <- calculate_auc(full_df)
+
+# Perform some preliminary control group analysis of variability
+control_df <-
+  control_grouping(full_df, control, pc) # Pull out control compound datapoints
+control_var <-
+  control_variability(control_df) # Read out the control group variability
+controlPlot <-
+  control_analysis(
+    full_df,
+    nc = 'vehicle',
+    pc = 'control',
+    output = 'plot',
+    controlDF = control_var
+  )
+print(controlPlot)
+
+#Calculate melting parameter difference for each well from MoltenProt
+# full_df <- calculate_meltingparams(full_df) %>%
+#   calculate_zscore() %>%
+#   convert_zscore
+
+#Derive RSS values for null and alternate model for each compound from full_df
+rss <- compute.rss.models(full_df, rssPlot = FALSE, drPlot = FALSE, plotModel = FALSE)
+
+#Perform dose-response for each thermogram parameter
+parameters <- compute_parameter.rssmodel(full_df, plotModel = FALSE)
+
+#Merge these plots for further analysis
+signif.df <- merge(rss, parameters)
+colnames(signif.df)[9] <- 'mannwhit.pval'
+signif.df <- determineSig(signif.df)
+signif.df <- rankOrder(signif.df)
+
+# Volcano plots comparing the different parameters of analysis against the NPARC RSS Difference
+# Colored by significance test and whether the compound passes any.
+plot_volcanos(signif.df, save = FALSE)
+
+# Plot of RSS Differences vs. p-values for NPARC
+rss.pval.plot(signif.df, savePlot = TRUE)
+
+#Heatmap of compounds vs. different measurement styles.
+parameter_heatmaps(signif.df, plotHeat = TRUE)
+
+#Write out signif.df and full_df
+write.csv(x = full_df, file = paste(outdir,'full_df.csv',sep="/"))
+write.csv(x = signif.df, file = paste(outdir,'signif_df.csv',sep="/"))
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_data.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_data.R
new file mode 100644
index 0000000..5a74700
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_data.R
@@ -0,0 +1,113 @@
+# #  @@@@@@@   @@@@@@@              @@@@@@@  @@@@@@@@  @@@@@@@   @@@@@@    @@@@@@
+# #  @@@@@@@@  @@@@@@@             @@@@@@@@  @@@@@@@@  @@@@@@@  @@@@@@@   @@@@@@@@
+# #  @@!  @@@    @@!               !@@       @@!         @@!    !@@       @@!  @@@
+# #  !@!  @!@    !@!               !@!       !@!         !@!    !@!       !@!  @!@
+# #  @!@!!@!     @!!    @!@!@!@!@  !@!       @!!!:!      @!!    !!@@!!    @!@!@!@!
+# #  !!@!@!      !!!    !!!@!@!!!  !!!       !!!!!:      !!!     !!@!!!   !!!@!!!!
+# #  !!: :!!     !!:               :!!       !!:         !!:         !:!  !!:  !!!
+# #  :!:  !:!    :!:               :!:       :!:         :!:        !:!   :!:  !:!
+# #  ::   :::     ::                ::: :::   :: ::::     ::    :::: ::   ::   :::
+# # NonParametric Multiparameter Analysis of CETSA/RT-CETSA Experimental Sets
+# #
+# # Written by: Michael Ronzetti {NIH/NCATS/UMD}
+# # Patents: PCT/US21/45184, HHS E-022-2022-0-US-01
+# # Main Analysis
+
+suppressWarnings(library(logging))
+suppressWarnings(library(tidyverse))
+suppressWarnings(library(readxl))
+suppressWarnings(library(stringr))
+suppressWarnings(library(drc))
+suppressWarnings(library(ggthemes))
+suppressWarnings(library(cowplot))
+suppressWarnings(library(hrbrthemes))
+suppressWarnings(library(ggpubr))
+suppressWarnings(library(MESS))
+suppressWarnings(library(devtools))
+
+loginfo('loading moltenprot fit params from : %s', params)
+loginfo('loading moltenprot baseline corrected from : %s', values)
+loginfo('loading platemap fit params from : %s', plate_map)
+
+
+source('./prepare_params.R')
+
+# full_param <- read_csv(params,
+# show_col_types = FALSE
+# )
+
+source('./prepare_values.R')
+
+# curve_df <- read_csv(values,
+# show_col_types = FALSE
+# )
+
+platemap_filepath = plate_map
+
+# Assign compound ids and concentration from platemap
+plate_assignment <- function(df, platemap_file) {
+  # read sample sheet from plate file
+  id_df <- read_excel(platemap_file, sheet = 'sample') %>%
+  # remove first column
+  dplyr::select(-1) %>%
+  # pivot to get row, col coordinates as columns
+  pivot_longer(cols = 1:ncol(.))  %>%
+  # rename the columns with all ids
+  rename(ncgc_id = value) %>%
+  # remove name column
+  dplyr::select(-c('name'))
+  # NOTE `EMPTY` are considered as vehicle
+  id_df$ncgc_id <- gsub('empty', 'vehicle', id_df$ncgc_id)
+
+  # read the concentration from the file
+  conc_df <- read_excel(platemap_file, sheet = 'conc') %>%
+  # remove first colum
+  dplyr::select(-1) %>%
+  # pivot
+  pivot_longer(., cols = 1:ncol(.)) %>%
+  rename(conc = value) %>%
+  dplyr::select(-c('name'))
+
+  # add the columns to the datset
+  df <- cbind(id_df, conc_df, df)
+  message('Plate assignment attached to dataframe.')
+
+  # make sure we have numeric value? (unecessary?)
+  df$row <- as.numeric(df$row)
+  df$col <- as.numeric(df$col)
+  return(df)
+}
+
+
+full_df <- full_param
+
+
+
+full_df <- plate_assignment(full_df, platemap_filepath)
+
+# Construct full data frame with curve fit and parameters for analysis
+bind_fulldf <- function(param_df, curve_df) {
+  df <- cbind(param_df, curve_df)
+  return(df)
+}
+
+# Concat dataframes.
+loginfo('concat dataframes')
+full_df <- bind_fulldf(full_df, curve_df)
+
+#Convert any columns containing Kelvin values from MoltenProt to Celsius
+kelToCel <- function(df) {
+  df <- df %>%
+    mutate(Tm_fit = Tm_fit - 273.15) %>%
+    mutate(T_onset = T_onset - 273.15)
+}
+
+# TODO move that before for each dataset
+# full_df <- full_df %>% dplyr::select(-c('...1')) %>% dplyr::select(-c('...1'))
+full_df <- kelToCel(full_df)
+
+merge_molten_filename <- "test_merged_molten_final.csv"
+output_path <- paste(outdir, merge_molten_filename, sep="/")
+loginfo('writing results to : %s', output_path)
+
+write.csv(full_df,output_path)
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py
new file mode 100644
index 0000000..8f284cc
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py
@@ -0,0 +1 @@
+"""Preprocess pandas dataframmes in python."""
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_params.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_params.R
new file mode 100644
index 0000000..59247de
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_params.R
@@ -0,0 +1,78 @@
+suppressWarnings(library(logging))
+library(tidyverse)
+
+# params = "/Users/antoinegerardin/RT-CETSA-Analysis/.data/final_outputs/moltenprot/plate_(1-59)_moltenprot_params.csv"
+
+loginfo('loading moltenprot params from : %s', params)
+
+# create a dataframe with two columns (row, col) for plate of (16,24)
+col_by_row <- expand.grid(row = sprintf('%.2d', 1:16), col = sprintf('%.2d', 1:24)) %>%
+# sort by row number
+arrange(., row)
+
+# NOTE this process creates spurious columns that should be removed
+exp_param <- read_csv(params,
+show_col_types = FALSE
+)
+
+# create a col with sequential ids for indexing
+# named it `well`
+exp_param <- exp_param %>% rownames_to_column() %>% rename('well' = 'rowname')
+
+# select columns we need for this analysis
+exp_param <- exp_param %>%
+        dplyr::select(
+          c(
+            'well',
+            'dHm_fit',
+            'Tm_fit',
+            'BS_factor',
+            'T_onset',
+            'dG_std'
+        ))
+
+# add row col info to results (after the well column)
+exp_param <- exp_param %>% bind_cols(col_by_row) %>% relocate(c('row', 'col'), .after = well)
+
+# NOTE why do we create well at the first place?
+# remove the well column
+exp_param <- exp_param %>% dplyr::select(-'well')
+
+# NOTE Basically regenerate the battleship coordinates based on the current ordering
+# Add well assignments for each plate
+well_assignment <- function(df, well_num) {
+  if (well_num == 96) {
+    letter <- LETTERS[1:8]
+    number <- c(1:12)
+    number <- str_pad(number, 2, pad = '0')
+    tracker <- 1
+    temp_df <- tibble(well = c(1:384))
+    for (val in letter) {
+      for (num in number) {
+        temp_df$well[tracker] <- paste(val, num, sep = '')
+        tracker <- tracker + 1
+      }
+    }
+  }
+  else if (well_num == 384) {
+    letter <- LETTERS[1:16]
+    number <- c(1:24)
+    number <- str_pad(number, 2, pad = '0')
+    tracker <- 1
+    temp_df <- tibble(well = c(1:384))
+    for (val in letter) {
+      for (num in number) {
+        temp_df$well[tracker] <- paste(val, num, sep = '')
+        tracker <- tracker + 1
+      }
+    }
+  }
+  message('Well assignments created for ', well_num, '-well plate.')
+  df <- cbind(temp_df, df)
+  return(df)
+}
+
+# exp_param <- well_assignment(exp_param, 384)
+full_param <- well_assignment(exp_param, 384)
+
+# write.csv(exp_param, "test_exp_param_full.csv")
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_values.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_values.R
new file mode 100644
index 0000000..a12d3ae
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_values.R
@@ -0,0 +1,64 @@
+
+# values = "/Users/antoinegerardin/RT-CETSA-Analysis/.data/final_outputs/moltenprot/plate_(1-59)_moltenprot_curves.csv"
+
+suppressWarnings(library(logging))
+library(tidyverse)
+
+loginfo('loading moltenprot values from : %s', values)
+
+# NOTE this process creates spurious columns that should be removed
+exp_curve_all <- read_csv(values,
+show_col_types = FALSE
+)
+
+# same, row, col grid.
+col_by_row <-
+  expand.grid(row = sprintf('%.2d', 1:16), col = sprintf('%.2d', 1:24)) %>%
+  arrange(., row)
+
+
+# rename to Temperature
+exp_curve_all <- exp_curve_all %>%
+# add prefix
+mutate(., Temperature = paste('val_t_', Temperature, sep = ''))
+
+exp_curve_all <- exp_curve_all %>%
+  # pivot transform columns into row combinations (vy creating a column name)
+  pivot_longer(cols = 2:ncol(exp_curve_all)) %>%
+  # pivoting again to get temperature as columns
+  pivot_wider(names_from = Temperature) %>%
+  # create a id column called well
+  rownames_to_column() %>% rename('well' = 'rowname') %>%
+  # add the grid coordinates
+  bind_cols(col_by_row) %>%
+  # remove all unused cols
+  dplyr::select(-c('name', 'well', 'row', 'col'))
+
+
+# Add temperature headers to df
+# TODO REVIEW this is sketchy
+add_tempheaders <- function(df,
+                            start_temp = 37,
+                            end_temp = 90) {
+  # generate temperature intervals
+  temperature_df <-
+    seq(start_temp, end_temp, by = ((end_temp - start_temp) / (ncol(df) - 1))) %>%
+    round(., digits = 1)
+  # rewrite all temperatures!
+  # TODO CHECK that, that's quite sketchy. Should convert existing temp
+  for (i in 1:ncol(df)) {
+    colnames(df)[i] <- paste('t_', temperature_df[i], sep = '')
+  }
+  message('Temperature assignments changed for ',
+          ncol(df),
+          ' points.')
+  return(df)
+}
+
+start_temp = 37
+end_temp = 90
+# exp_curve_all <- add_tempheaders(exp_curve_all, start_temp, end_temp)
+curve_df <- add_tempheaders(exp_curve_all, start_temp, end_temp)
+message('Fit curves retrieved.')
+
+# write.csv(exp_curve_all, "test_exp_curve_all.csv")
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py
new file mode 100644
index 0000000..4166b0d
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py
@@ -0,0 +1,42 @@
+"""Run R scripts."""
+
+import logging
+import os
+import subprocess
+from pathlib import Path
+
+POLUS_LOG = os.environ.get("POLUS_LOG", logging.INFO)
+
+logger = logging.getLogger("rt_cetsa_analysis")
+logger.setLevel(POLUS_LOG)
+
+
+def run_rscript(
+    params_filepath: Path,
+    values_filepath: Path,
+    platemap_filepath: Path,
+    out_dir: Path,
+):
+    """Run R script."""
+    print(
+        "run rscript with args: ",
+        params_filepath,
+        values_filepath,
+        platemap_filepath,
+        out_dir,
+    )
+
+    cmd = [
+        "Rscript",
+        "./main.R",
+        "--params",
+        params_filepath.as_posix(),
+        "--values",
+        values_filepath.as_posix(),
+        "--platemap",
+        platemap_filepath.as_posix(),
+        "--outdir",
+        out_dir.as_posix(),
+    ]
+
+    subprocess.run(args=cmd, cwd="src/polus/tabular/regression/rt_cetsa_analysis/")

From a64ec3ccff4fdabeed55dd5e31f5dd316b583634 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 21 May 2024 05:13:50 -0400
Subject: [PATCH 07/18] feat: update for running tools in rt_cetsa dashboard.

---
 regression/rt-cetsa-analysis-tool/.gitignore  |  1 +
 regression/rt-cetsa-analysis-tool/Dockerfile  | 13 +++-----
 .../rt-cetsa-analysis-tool/pyproject.toml     |  2 +-
 .../rt_cetsa_analysis.cwl                     |  4 +++
 .../regression/rt_cetsa_analysis/__main__.py  |  4 ++-
 .../regression/rt_cetsa_analysis/main.R       | 22 +++++++++++--
 .../rt_cetsa_analysis/main_analysis.R         |  8 +----
 .../rt_cetsa_analysis/prepare_data.R          | 14 --------
 .../prepare_moltenprot_outputs.py             | 32 +++++++++++++++++++
 .../rt_cetsa_analysis/prepare_params.R        |  8 +----
 .../rt_cetsa_analysis/prepare_values.R        | 19 ++++-------
 .../rt_cetsa_analysis/run_rscript.py          | 29 +++++++++++++----
 .../rt_cetsa_analysis/simple_main.R           | 30 +++++++++++++++++
 .../regression/rt_cetsa_analysis/test.R       |  0
 .../rt-cetsa-analysis-tool/tests/__init__.py  |  1 +
 .../rt-cetsa-analysis-tool/tests/conftest.py  | 13 ++++++++
 .../tests/test_run_rscript.py                 | 20 ++++++++++++
 17 files changed, 159 insertions(+), 61 deletions(-)
 create mode 100644 regression/rt-cetsa-analysis-tool/.gitignore
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/simple_main.R
 create mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/test.R
 create mode 100644 regression/rt-cetsa-analysis-tool/tests/__init__.py
 create mode 100644 regression/rt-cetsa-analysis-tool/tests/conftest.py
 create mode 100644 regression/rt-cetsa-analysis-tool/tests/test_run_rscript.py

diff --git a/regression/rt-cetsa-analysis-tool/.gitignore b/regression/rt-cetsa-analysis-tool/.gitignore
new file mode 100644
index 0000000..c32211a
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/.gitignore
@@ -0,0 +1 @@
+tests/out
diff --git a/regression/rt-cetsa-analysis-tool/Dockerfile b/regression/rt-cetsa-analysis-tool/Dockerfile
index b41b225..6311372 100644
--- a/regression/rt-cetsa-analysis-tool/Dockerfile
+++ b/regression/rt-cetsa-analysis-tool/Dockerfile
@@ -44,17 +44,12 @@ COPY pyproject.toml ${EXEC_DIR}
 COPY README.md ${EXEC_DIR}
 RUN apt-get update && apt-get install -y python3
 RUN apt-get -y install python3-pip python3-venv
-
 COPY src/ ${EXEC_DIR}/src
-
-RUN python3 -m venv ${EXEC_DIR}/.venv
-# RUN chmod u+x ${EXEC_DIR}/.venv/bin/activate
-# RUN ${EXEC_DIR}/.venv/bin/activate
-RUN ${EXEC_DIR}/.venv/bin/pip3 install ${EXEC_DIR} --no-cache-dir
-
+RUN pip3 install ${EXEC_DIR} --no-cache-dir  --break-system-packages
 
 WORKDIR ${EXEC_DIR}
 
 # CMD ["Rscript", "main.R"]
-ENTRYPOINT ["/opt/executables/.venv/bin/python3", "-m", "polus.tabular.regression.rt_cetsa_analysis"]
-CMD ["--help"]
+CMD ["python3", "-m", "polus.tabular.regression.rt_cetsa_analysis"]
+# CMD ["--help"]
+# CMD ["bash"]
diff --git a/regression/rt-cetsa-analysis-tool/pyproject.toml b/regression/rt-cetsa-analysis-tool/pyproject.toml
index 92595cf..323a14f 100644
--- a/regression/rt-cetsa-analysis-tool/pyproject.toml
+++ b/regression/rt-cetsa-analysis-tool/pyproject.toml
@@ -13,7 +13,7 @@ packages = [{include = "polus", from = "src"}]
 [tool.poetry.dependencies]
 python = ">=3.9,<3.12"
 # filepattern = "2.0.4"
-typer = "^0.12.3"
+typer = "^0.7"
 
 [tool.poetry.group.dev.dependencies]
 bump2version = "^1.0.1"
diff --git a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
index e49f32b..149b3a3 100644
--- a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
+++ b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
@@ -1,5 +1,6 @@
 class: CommandLineTool
 cwlVersion: v1.2
+baseCommand: ["python3", "-m", "polus.tabular.regression.rt_cetsa_analysis"]
 inputs:
   inpDir:
     inputBinding:
@@ -31,6 +32,9 @@ outputs:
       glob: $(inputs.outDir.basename)
     type: Directory
 requirements:
+  EnvVarRequirement:
+    envDef:
+      WORKDIR: /opt/executables/
   DockerRequirement:
     dockerPull: polusai/rt-cetsa-analysis-simple-tool:0.1.0
   InitialWorkDirRequirement:
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
index 30cd581..4264c23 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
@@ -68,7 +68,7 @@ def main(
 ) -> None:
     """CLI for rt-cetsa-moltprot-tool."""
     # TODO: Add to docs that input csv file should be sorted by `Temperature` column.
-    logger.info("Starting the CLI for rt-cetsa-moltprot-tool.")
+    logger.info("Starting the CLI for rt-cetsa-moltenprot-tool.")
 
     logger.info(f"Input directory: {inp_dir}")
     logger.info(f"params_pattern: {params_pattern}")
@@ -79,6 +79,8 @@ def main(
     params = inp_dir / params_pattern
     values = inp_dir / values_pattern
 
+    logger.info(f"{inp_dir}")
+
     if preview:
         NotImplemented  # noqa:  B018
 
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main.R
index 118d0cf..f8742cc 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main.R
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main.R
@@ -7,21 +7,24 @@ basicConfig()
 
 # Setup the argument parsing
 addFitParams <- function(parser) {
-  print("addFitParams")
   parser$add_argument("--params", type = "character",help="Fit params csv file")
   invisible(NULL)
+  print("params arg ok ")
 }
 addBaselineCorrected <- function(parser) {
   parser$add_argument("--values", type = "character",help="Baseline corrected csv file")
   invisible(NULL)
+  print("values arg ok ")
 }
 addPlateMap <- function(parser) {
   parser$add_argument("--platemap", type = "character",help="platemap excel file")
   invisible(NULL)
+  print("platemap arg ok ")
 }
 addOutputArgs <- function(parser) {
-  parser$add_argument("--outdir", type = "character",help="Output csv file")
+  parser$add_argument("--outdir", type = "character",help="Output directory")
   invisible(NULL)
+  print("outdir arg ok ")
 }
 getAllParser <- function() {
   parser <- ArgumentParser(description="ALL PARSER")
@@ -33,8 +36,21 @@ getAllParser <- function() {
 }
 
 # Parse the arguments
+loginfo('collect all parsing routines...')
 parser <- getAllParser()
-args <- parser$parse_args()
+loginfo('parse data...')
+
+print(parser)
+
+tryCatch(
+  args <- parser$parse_args(),
+  error = function(e){
+    print("There was an error: ")
+    print(e)
+  }
+)
+
+
 
 
 #Path to csvfile directory
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main_analysis.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main_analysis.R
index 53ef628..c15c59a 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main_analysis.R
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/main_analysis.R
@@ -27,22 +27,17 @@ library(devtools)
 # BECAUSE OF BUG
 pdf(file = NULL)
 
-
 # EXPERIMENTAL PARAMETERS AND SETUP
 #
 # Input experiment parameters here
-
 startTemp <- 37
 endTemp <- 95
 plate_format <- 384
 control <- 'vehicle'
 pc <- 'control'
 
-# read in preprocessed data
-merge_molten_filename <- "test_merged_molten_final.csv"
-output_path <- paste(outdir, merge_molten_filename, sep="/")
-loginfo('reading preprocessed data from : %s', output_path)
 source('./functions.R')
+
 full_df <- calculate_auc(full_df)
 
 # Perform some preliminary control group analysis of variability
@@ -58,7 +53,6 @@ controlPlot <-
     output = 'plot',
     controlDF = control_var
   )
-print(controlPlot)
 
 #Calculate melting parameter difference for each well from MoltenProt
 # full_df <- calculate_meltingparams(full_df) %>%
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_data.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_data.R
index 5a74700..f08e82e 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_data.R
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_data.R
@@ -32,16 +32,8 @@ loginfo('loading platemap fit params from : %s', plate_map)
 
 source('./prepare_params.R')
 
-# full_param <- read_csv(params,
-# show_col_types = FALSE
-# )
-
 source('./prepare_values.R')
 
-# curve_df <- read_csv(values,
-# show_col_types = FALSE
-# )
-
 platemap_filepath = plate_map
 
 # Assign compound ids and concentration from platemap
@@ -105,9 +97,3 @@ kelToCel <- function(df) {
 # TODO move that before for each dataset
 # full_df <- full_df %>% dplyr::select(-c('...1')) %>% dplyr::select(-c('...1'))
 full_df <- kelToCel(full_df)
-
-merge_molten_filename <- "test_merged_molten_final.csv"
-output_path <- paste(outdir, merge_molten_filename, sep="/")
-loginfo('writing results to : %s', output_path)
-
-write.csv(full_df,output_path)
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py
index 8f284cc..dad0616 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py
@@ -1 +1,33 @@
 """Preprocess pandas dataframmes in python."""
+
+from pathlib import Path
+
+import pandas as pd
+from scipy.constants import convert_temperature
+
+pd.options.display.precision = 1
+
+
+inp_dir = Path(
+    "/Users/antoinegerardin/RT-CETSA-Analysis/.data/final_outputs/moltenprot/",
+)
+params = "plate_(1-59)_moltenprot_params.csv"
+values = "plate_(1-59)_moltenprot_curves.csv"
+
+df = pd.read_csv(inp_dir / values)
+df["Temperature"] = (
+    df["Temperature"]
+    .map(lambda temp: convert_temperature(temp, "Kelvin", "Celsius"))
+    .round(1)
+    .astype(str)
+)
+df["Temperature"] = "t_" + df["Temperature"]
+
+print(list(df.columns))
+print(df.index)
+df = df.transpose()
+print(df.head)
+
+print(list(df.columns))
+
+df.to_csv("transformed_curves.csv")
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_params.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_params.R
index 59247de..c21372d 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_params.R
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_params.R
@@ -1,8 +1,6 @@
 suppressWarnings(library(logging))
 library(tidyverse)
 
-# params = "/Users/antoinegerardin/RT-CETSA-Analysis/.data/final_outputs/moltenprot/plate_(1-59)_moltenprot_params.csv"
-
 loginfo('loading moltenprot params from : %s', params)
 
 # create a dataframe with two columns (row, col) for plate of (16,24)
@@ -10,7 +8,6 @@ col_by_row <- expand.grid(row = sprintf('%.2d', 1:16), col = sprintf('%.2d', 1:2
 # sort by row number
 arrange(., row)
 
-# NOTE this process creates spurious columns that should be removed
 exp_param <- read_csv(params,
 show_col_types = FALSE
 )
@@ -38,7 +35,7 @@ exp_param <- exp_param %>% bind_cols(col_by_row) %>% relocate(c('row', 'col'), .
 # remove the well column
 exp_param <- exp_param %>% dplyr::select(-'well')
 
-# NOTE Basically regenerate the battleship coordinates based on the current ordering
+# NOTE Basically regenerate the alphanumerical coordinates based on the current ordering
 # Add well assignments for each plate
 well_assignment <- function(df, well_num) {
   if (well_num == 96) {
@@ -72,7 +69,4 @@ well_assignment <- function(df, well_num) {
   return(df)
 }
 
-# exp_param <- well_assignment(exp_param, 384)
 full_param <- well_assignment(exp_param, 384)
-
-# write.csv(exp_param, "test_exp_param_full.csv")
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_values.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_values.R
index a12d3ae..866423d 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_values.R
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_values.R
@@ -1,12 +1,9 @@
-
-# values = "/Users/antoinegerardin/RT-CETSA-Analysis/.data/final_outputs/moltenprot/plate_(1-59)_moltenprot_curves.csv"
-
 suppressWarnings(library(logging))
 library(tidyverse)
 
 loginfo('loading moltenprot values from : %s', values)
 
-# NOTE this process creates spurious columns that should be removed
+
 exp_curve_all <- read_csv(values,
 show_col_types = FALSE
 )
@@ -17,13 +14,12 @@ col_by_row <-
   arrange(., row)
 
 
-# rename to Temperature
 exp_curve_all <- exp_curve_all %>%
 # add prefix
 mutate(., Temperature = paste('val_t_', Temperature, sep = ''))
 
 exp_curve_all <- exp_curve_all %>%
-  # pivot transform columns into row combinations (vy creating a column name)
+  # pivot transform columns into row combinations
   pivot_longer(cols = 2:ncol(exp_curve_all)) %>%
   # pivoting again to get temperature as columns
   pivot_wider(names_from = Temperature) %>%
@@ -34,9 +30,6 @@ exp_curve_all <- exp_curve_all %>%
   # remove all unused cols
   dplyr::select(-c('name', 'well', 'row', 'col'))
 
-
-# Add temperature headers to df
-# TODO REVIEW this is sketchy
 add_tempheaders <- function(df,
                             start_temp = 37,
                             end_temp = 90) {
@@ -55,10 +48,10 @@ add_tempheaders <- function(df,
   return(df)
 }
 
+# NOTE: HARDCODED VALUE INTERVAL
+# Also, this should comes from moltenprot
 start_temp = 37
 end_temp = 90
-# exp_curve_all <- add_tempheaders(exp_curve_all, start_temp, end_temp)
-curve_df <- add_tempheaders(exp_curve_all, start_temp, end_temp)
-message('Fit curves retrieved.')
 
-# write.csv(exp_curve_all, "test_exp_curve_all.csv")
+curve_df <- add_tempheaders(exp_curve_all, start_temp, end_temp)
+message('Done preparing fit curves.')
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py
index 4166b0d..5ce5bd8 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 
 POLUS_LOG = os.environ.get("POLUS_LOG", logging.INFO)
+WORKDIR = os.environ.get("WORKDIR", "")
 
 logger = logging.getLogger("rt_cetsa_analysis")
 logger.setLevel(POLUS_LOG)
@@ -26,17 +27,33 @@ def run_rscript(
         out_dir,
     )
 
+    cwd = Path(__file__).parent
+    if WORKDIR:
+        cwd = (
+            Path(WORKDIR)
+            / "src"
+            / "polus"
+            / "tabular"
+            / "regression"
+            / "rt_cetsa_analysis/"
+        )
+
+    logger.info(f"################## current working directory : {cwd.as_posix()}")
+
+    #     "Rscript",
+    #     "./main.R",
+    #     "--params",
+    #     "--values",
+    #     "--platemap",
+    #     "--outdir",
+
     cmd = [
         "Rscript",
-        "./main.R",
-        "--params",
+        "./simple_main.R",
         params_filepath.as_posix(),
-        "--values",
         values_filepath.as_posix(),
-        "--platemap",
         platemap_filepath.as_posix(),
-        "--outdir",
         out_dir.as_posix(),
     ]
 
-    subprocess.run(args=cmd, cwd="src/polus/tabular/regression/rt_cetsa_analysis/")
+    subprocess.run(args=cmd, cwd=cwd)
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/simple_main.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/simple_main.R
new file mode 100644
index 0000000..b30eb7b
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/simple_main.R
@@ -0,0 +1,30 @@
+
+suppressWarnings(library(logging))
+
+
+# Initialize the logger
+basicConfig()
+
+args = commandArgs(trailingOnly=TRUE)
+print(args[1])
+print(args[2])
+print(args[3])
+print(args[4])
+
+params <- args[1]
+values <- args[2]
+plate_map <- args[3]
+outdir <- args[4]
+
+loginfo('params = %s', params)
+loginfo('values = %s', values)
+loginfo('platemap = %s', plate_map)
+loginfo('outdir = %s', outdir)
+
+loginfo('params (fit params) = %s', params)
+loginfo('values (baseline corrected): %s', values)
+loginfo('platemap file (plate metadata): %s', plate_map)
+loginfo('outdir (output directory): %s', outdir)
+
+source('./prepare_data.R')
+source('./main_analysis.R')
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/test.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/test.R
new file mode 100644
index 0000000..e69de29
diff --git a/regression/rt-cetsa-analysis-tool/tests/__init__.py b/regression/rt-cetsa-analysis-tool/tests/__init__.py
new file mode 100644
index 0000000..d420712
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests."""
diff --git a/regression/rt-cetsa-analysis-tool/tests/conftest.py b/regression/rt-cetsa-analysis-tool/tests/conftest.py
new file mode 100644
index 0000000..4983315
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/tests/conftest.py
@@ -0,0 +1,13 @@
+"""Set up."""
+import pytest
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    """Add options to pytest."""
+    parser.addoption(
+        "--slow",
+        action="store_true",
+        dest="slow",
+        default=False,
+        help="run slow tests",
+    )
diff --git a/regression/rt-cetsa-analysis-tool/tests/test_run_rscript.py b/regression/rt-cetsa-analysis-tool/tests/test_run_rscript.py
new file mode 100644
index 0000000..fb8cb0b
--- /dev/null
+++ b/regression/rt-cetsa-analysis-tool/tests/test_run_rscript.py
@@ -0,0 +1,20 @@
+"""Tests."""
+from pathlib import Path
+
+import pytest
+from polus.tabular.regression.rt_cetsa_analysis.run_rscript import run_rscript
+
+
+@pytest.mark.skipif("not config.getoption('slow')")
+def test_run_rscript():
+    """Run R script."""
+    inpDir = Path.cwd() / "tests" / "data"
+    params = "plate_(1-59)_moltenprot_params.csv"
+    values = "plate_(1-59)_moltenprot_curves.csv"
+    platemap = Path.cwd() / "tests" / "data" / "platemap.xlsx"
+    outDir = Path.cwd() / "tests" / "out"
+
+    params = inpDir / params
+    values = inpDir / values
+
+    run_rscript(params, values, platemap, outDir)

From 1a34c2ab9b2a29d4e8924e052c8c4f097d3979a7 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 21 May 2024 05:50:51 -0400
Subject: [PATCH 08/18] feat: update analysis tool before PR.

---
 regression/rt-cetsa-analysis-tool/ict.yml     |  8 +--
 regression/rt-cetsa-analysis-tool/plugin.json | 17 ++++--
 .../regression/rt_cetsa_analysis/__main__.py  | 54 ++++++++++++-------
 .../rt_cetsa_analysis/__main_filepattern__.py | 52 ------------------
 .../prepare_moltenprot_outputs.py             | 33 ------------
 .../rt_cetsa_analysis/run_rscript.py          | 18 +------
 .../rt_cetsa_analysis/simple_main.R           |  4 --
 .../regression/rt_cetsa_analysis/test.R       |  0
 8 files changed, 55 insertions(+), 131 deletions(-)
 delete mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main_filepattern__.py
 delete mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py
 delete mode 100644 regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/test.R

diff --git a/regression/rt-cetsa-analysis-tool/ict.yml b/regression/rt-cetsa-analysis-tool/ict.yml
index b001e9f..353fc84 100644
--- a/regression/rt-cetsa-analysis-tool/ict.yml
+++ b/regression/rt-cetsa-analysis-tool/ict.yml
@@ -12,13 +12,13 @@ inputs:
   - params
   name: params
   required: true
-  type: path
+  type: string
 - description: Baseline corrected csv files
   format:
   - values
   name: values
   required: true
-  type: path
+  type: string
 - description: platemap excel file
   format:
   - platemap
@@ -46,11 +46,11 @@ ui:
 - description: Fit params csv file
   key: inputs.params
   title: Fit params csv file
-  type: path
+  type: string
 - description: baseline corrected csv file
   key: inputs.values
   title: baseline corrected csv file
-  type: path
+  type: string
 - description: platemap excel file
   key: inputs.platemap
   title: platemap excel file
diff --git a/regression/rt-cetsa-analysis-tool/plugin.json b/regression/rt-cetsa-analysis-tool/plugin.json
index d59519a..f16fd81 100644
--- a/regression/rt-cetsa-analysis-tool/plugin.json
+++ b/regression/rt-cetsa-analysis-tool/plugin.json
@@ -48,10 +48,19 @@
       "description": "Input data collection to be processed by this plugin"
     },
     {
-      "key": "inputs.filePattern",
-      "title": "File Pattern",
-      "description": "File Pattern to parse input files",
-      "default": ".+"
+      "key": "inputs.params",
+      "title": "Params",
+      "description": "Params"
+    },
+    {
+      "key": "inputs.values",
+      "title": "Values",
+      "description": "Values"
+    },
+    {
+      "key": "inputs.platemap",
+      "title": "Plate Map",
+      "description": "Plate Map"
     }
   ]
 }
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
index 4264c23..e4af4bb 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
@@ -1,5 +1,6 @@
-"""CLI for rt-cetsa-moltprot-tool."""
+"""CLI for rt-cetsa-analysis tool."""
 
+import json
 import logging
 import os
 from pathlib import Path
@@ -33,15 +34,15 @@ def main(
         readable=True,
         resolve_path=True,
     ),
-    params_pattern: str = typer.Option(
+    params_filename: str = typer.Option(
         ...,
         "--params",
-        help="name of the molten fit params csv file in the input directory.",
+        help="name of the moltenprot fit params csv file in the input directory.",
     ),
-    values_pattern: str = typer.Option(
+    values_filename: str = typer.Option(
         ...,
         "--values",
-        help="name of the baseline corrected values csv file in the input directory.",
+        help="name of the moltenprot baseline corrected values csv file in the input directory.",
     ),
     platemap: Path = typer.Option(
         ...,
@@ -66,30 +67,47 @@ def main(
         resolve_path=True,
     ),
 ) -> None:
-    """CLI for rt-cetsa-moltprot-tool."""
-    # TODO: Add to docs that input csv file should be sorted by `Temperature` column.
-    logger.info("Starting the CLI for rt-cetsa-moltenprot-tool.")
+    """CLI for rt-cetsa-analysis tool."""
+    logger.info("Starting the CLI for rt-cetsa-analysis tool.")
 
     logger.info(f"Input directory: {inp_dir}")
-    logger.info(f"params_pattern: {params_pattern}")
-    logger.info(f"values_pattern: {values_pattern}")
+    logger.info(f"params: {params_filename}")
+    logger.info(f"values: {values_filename}")
     logger.info(f"platemap path: {platemap}")
     logger.info(f"Output directory: {out_dir}")
 
-    params = inp_dir / params_pattern
-    values = inp_dir / values_pattern
+    if params_filename:
+        params = inp_dir / params_filename
+    elif (inp_dir / "params.csv").exists():
+        params = inp_dir / "params.csv"
+    else:
+        raise ValueError(
+            f"No 'params.csv' moltenprot parameters file found in {inp_dir}.",
+        )
 
-    logger.info(f"{inp_dir}")
-
-    if preview:
-        NotImplemented  # noqa:  B018
+    if values_filename:
+        values = inp_dir / values_filename
+    elif (inp_dir / "values.csv").exists():
+        values = inp_dir / "values.csv"
+    else:
+        raise ValueError(f"No 'values.csv' moltenprot values file found in {inp_dir}.")
 
     if not params.exists():
         raise FileNotFoundError(f"params file not found : {params}")
     if not values.exists():
         raise FileNotFoundError(f"values file not found : {values}")
-    if not platemap.exists():
-        raise FileNotFoundError(f"platemap file not found : {platemap}")
+
+    logger.info(f"params filename: {params}")
+    logger.info(f"values filename: {values}")
+    logger.info(f"platemap path: {platemap}")
+    logger.info(f"Output directory: {out_dir}")
+
+    if preview:
+        outputs: list[str] = ["signif_df.csv"]
+        out_json = {"files": outputs}
+        with (out_dir / "preview.json").open("w") as f:
+            json.dump(out_json, f, indent=2)
+        return
 
     run_rscript(params, values, platemap, out_dir)
 
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main_filepattern__.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main_filepattern__.py
deleted file mode 100644
index d150462..0000000
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main_filepattern__.py
+++ /dev/null
@@ -1,52 +0,0 @@
-"""Main with filepattern support."""
-# """CLI for rt-cetsa-moltprot-tool."""
-
-
-# # get env
-
-# # Initialize the logger
-# logging.basicConfig(
-
-
-# @app.command()
-# def main(
-#     inp_dir: pathlib.Path = typer.Option(
-#         ...,
-#         "--inpDir",
-#     ),
-#     params_pattern: str = typer.Option(
-#         ".+",
-#         "--params",
-#     ),
-#     values_pattern: str = typer.Option(
-#         ".+",
-#         "--values",
-#     ),
-#     platemap_pattern: str = typer.Option(
-#         ".+",
-#         "--platemap",
-#     ),
-#     preview: bool = typer.Option(
-#         False,
-#         "--preview",
-#     ),
-#     out_dir: pathlib.Path = typer.Option(
-#         ...,
-#         "--outDir",
-#     ),
-# ) -> None:
-#     """CLI for rt-cetsa-moltprot-tool."""
-#     # TODO: Add to docs that input csv file should be sorted by `Temperature` column.
-
-
-#     if preview:
-#         with (out_dir / "preview.json").open("w") as f:
-
-#     for params, values, platemap in zip(params_files, values_files, platemap_files):
-#         # TODO replace with exceptions
-#         if len(params[1]) != 1 or len(values[1]) != 1 or len(platemap[1]) != 1:
-#             raise Exception(
-#                 msg,
-
-
-# if __name__ == "__main__":
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py
deleted file mode 100644
index dad0616..0000000
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/prepare_moltenprot_outputs.py
+++ /dev/null
@@ -1,33 +0,0 @@
-"""Preprocess pandas dataframmes in python."""
-
-from pathlib import Path
-
-import pandas as pd
-from scipy.constants import convert_temperature
-
-pd.options.display.precision = 1
-
-
-inp_dir = Path(
-    "/Users/antoinegerardin/RT-CETSA-Analysis/.data/final_outputs/moltenprot/",
-)
-params = "plate_(1-59)_moltenprot_params.csv"
-values = "plate_(1-59)_moltenprot_curves.csv"
-
-df = pd.read_csv(inp_dir / values)
-df["Temperature"] = (
-    df["Temperature"]
-    .map(lambda temp: convert_temperature(temp, "Kelvin", "Celsius"))
-    .round(1)
-    .astype(str)
-)
-df["Temperature"] = "t_" + df["Temperature"]
-
-print(list(df.columns))
-print(df.index)
-df = df.transpose()
-print(df.head)
-
-print(list(df.columns))
-
-df.to_csv("transformed_curves.csv")
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py
index 5ce5bd8..c7ca0c4 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/run_rscript.py
@@ -19,15 +19,8 @@ def run_rscript(
     out_dir: Path,
 ):
     """Run R script."""
-    print(
-        "run rscript with args: ",
-        params_filepath,
-        values_filepath,
-        platemap_filepath,
-        out_dir,
-    )
-
     cwd = Path(__file__).parent
+
     if WORKDIR:
         cwd = (
             Path(WORKDIR)
@@ -38,14 +31,7 @@ def run_rscript(
             / "rt_cetsa_analysis/"
         )
 
-    logger.info(f"################## current working directory : {cwd.as_posix()}")
-
-    #     "Rscript",
-    #     "./main.R",
-    #     "--params",
-    #     "--values",
-    #     "--platemap",
-    #     "--outdir",
+    logger.info(f"current working directory : {cwd.as_posix()}")
 
     cmd = [
         "Rscript",
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/simple_main.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/simple_main.R
index b30eb7b..239e87c 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/simple_main.R
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/simple_main.R
@@ -6,10 +6,6 @@ suppressWarnings(library(logging))
 basicConfig()
 
 args = commandArgs(trailingOnly=TRUE)
-print(args[1])
-print(args[2])
-print(args[3])
-print(args[4])
 
 params <- args[1]
 values <- args[2]
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/test.R b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/test.R
deleted file mode 100644
index e69de29..0000000

From 1b9a4c613e996665d67de13853598395cf1dfbaf Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 21 May 2024 05:53:00 -0400
Subject: [PATCH 09/18] chore: refactor moltenprot

---
 .../.bumpversion.cfg                                          | 0
 regression/rt-cetsa-moltenprot-tool/.gitignore                | 0
 .../Dockerfile                                                | 1 +
 .../README.md                                                 | 0
 .../VERSION                                                   | 0
 .../build-docker.sh                                           | 0
 .../ict.yml                                                   | 2 +-
 .../plugin.json                                               | 2 +-
 .../pyproject.toml                                            | 2 +-
 .../rt_cetsa_moltenprot.cwl                                   | 0
 .../run-plugin.sh                                             | 2 +-
 .../regression/rt_cetsa_moltenprot/MOLTENPROT_VERSION}        | 0
 .../polus/tabular/regression/rt_cetsa_moltenprot}/__init__.py | 0
 .../polus/tabular/regression/rt_cetsa_moltenprot}/__main__.py | 4 ++--
 .../src/polus/tabular/regression/rt_cetsa_moltenprot}/core.py | 2 +-
 .../polus/tabular/regression/rt_cetsa_moltenprot}/models.py   | 0
 16 files changed, 8 insertions(+), 7 deletions(-)
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/.bumpversion.cfg (100%)
 create mode 100644 regression/rt-cetsa-moltenprot-tool/.gitignore
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/Dockerfile (96%)
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/README.md (100%)
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/VERSION (100%)
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/build-docker.sh (100%)
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/ict.yml (95%)
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/plugin.json (95%)
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/pyproject.toml (98%)
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/rt_cetsa_moltenprot.cwl (100%)
 rename regression/{rt-cetsa-moltprot-tool => rt-cetsa-moltenprot-tool}/run-plugin.sh (87%)
 rename regression/{rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/RT_CETSA_VERSION => rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/MOLTENPROT_VERSION} (100%)
 rename regression/{rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot => rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot}/__init__.py (100%)
 rename regression/{rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot => rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot}/__main__.py (95%)
 rename regression/{rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot => rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot}/core.py (99%)
 rename regression/{rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot => rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot}/models.py (100%)

diff --git a/regression/rt-cetsa-moltprot-tool/.bumpversion.cfg b/regression/rt-cetsa-moltenprot-tool/.bumpversion.cfg
similarity index 100%
rename from regression/rt-cetsa-moltprot-tool/.bumpversion.cfg
rename to regression/rt-cetsa-moltenprot-tool/.bumpversion.cfg
diff --git a/regression/rt-cetsa-moltenprot-tool/.gitignore b/regression/rt-cetsa-moltenprot-tool/.gitignore
new file mode 100644
index 0000000..e69de29
diff --git a/regression/rt-cetsa-moltprot-tool/Dockerfile b/regression/rt-cetsa-moltenprot-tool/Dockerfile
similarity index 96%
rename from regression/rt-cetsa-moltprot-tool/Dockerfile
rename to regression/rt-cetsa-moltenprot-tool/Dockerfile
index db91d4c..4e6d5bc 100755
--- a/regression/rt-cetsa-moltprot-tool/Dockerfile
+++ b/regression/rt-cetsa-moltenprot-tool/Dockerfile
@@ -15,6 +15,7 @@ COPY README.md ${EXEC_DIR}
 COPY src ${EXEC_DIR}/src
 
 RUN pip3 install ${EXEC_DIR} --no-cache-dir
+RUN pip3 install .
 
 ENTRYPOINT ["python3", "-m", "polus.tabular.regression.rt_cetsa_moltenprot"]
 CMD ["--help"]
diff --git a/regression/rt-cetsa-moltprot-tool/README.md b/regression/rt-cetsa-moltenprot-tool/README.md
similarity index 100%
rename from regression/rt-cetsa-moltprot-tool/README.md
rename to regression/rt-cetsa-moltenprot-tool/README.md
diff --git a/regression/rt-cetsa-moltprot-tool/VERSION b/regression/rt-cetsa-moltenprot-tool/VERSION
similarity index 100%
rename from regression/rt-cetsa-moltprot-tool/VERSION
rename to regression/rt-cetsa-moltenprot-tool/VERSION
diff --git a/regression/rt-cetsa-moltprot-tool/build-docker.sh b/regression/rt-cetsa-moltenprot-tool/build-docker.sh
similarity index 100%
rename from regression/rt-cetsa-moltprot-tool/build-docker.sh
rename to regression/rt-cetsa-moltenprot-tool/build-docker.sh
diff --git a/regression/rt-cetsa-moltprot-tool/ict.yml b/regression/rt-cetsa-moltenprot-tool/ict.yml
similarity index 95%
rename from regression/rt-cetsa-moltprot-tool/ict.yml
rename to regression/rt-cetsa-moltenprot-tool/ict.yml
index da600de..5b987c2 100644
--- a/regression/rt-cetsa-moltprot-tool/ict.yml
+++ b/regression/rt-cetsa-moltenprot-tool/ict.yml
@@ -3,7 +3,7 @@ author:
 - Antoine Gerardin
 - Najib Ishaq
 contact: nick.schaub@nih.gov
-container: polusai/rt-cetsa-moltprot-tool:0.1.0
+container: polusai/rt-cetsa-moltproten-tool:0.1.0
 description: Run moltenprot regression for the RT-CETSA pipeline.
 entrypoint: python3 -m polus.tabular.regression.rt_cetsa_moltprot
 inputs:
diff --git a/regression/rt-cetsa-moltprot-tool/plugin.json b/regression/rt-cetsa-moltenprot-tool/plugin.json
similarity index 95%
rename from regression/rt-cetsa-moltprot-tool/plugin.json
rename to regression/rt-cetsa-moltenprot-tool/plugin.json
index 63877b2..8b44a5a 100644
--- a/regression/rt-cetsa-moltprot-tool/plugin.json
+++ b/regression/rt-cetsa-moltenprot-tool/plugin.json
@@ -8,7 +8,7 @@
   "repository": "https://github.com/PolusAI/tabular-tools",
   "website": "https://ncats.nih.gov/preclinical/core/informatics",
   "citation": "",
-  "containerId": "polusai/rt-cetsa-moltprot-tool:0.1.0",
+  "containerId": "polusai/rt-cetsa-moltenproten-tool:0.1.0",
   "baseCommand": [
     "python3",
     "-m",
diff --git a/regression/rt-cetsa-moltprot-tool/pyproject.toml b/regression/rt-cetsa-moltenprot-tool/pyproject.toml
similarity index 98%
rename from regression/rt-cetsa-moltprot-tool/pyproject.toml
rename to regression/rt-cetsa-moltenprot-tool/pyproject.toml
index aa90b1f..51f1640 100644
--- a/regression/rt-cetsa-moltprot-tool/pyproject.toml
+++ b/regression/rt-cetsa-moltenprot-tool/pyproject.toml
@@ -1,5 +1,5 @@
 [tool.poetry]
-name = "polus_tabular_regression_rt_cetsa_moltprot"
+name = "polus_tabular_regression_rt_cetsa_moltenprot"
 version = "0.1.0"
 description = "Run moltenprot for the RT-CETSA pipeline."
 authors = [
diff --git a/regression/rt-cetsa-moltprot-tool/rt_cetsa_moltenprot.cwl b/regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl
similarity index 100%
rename from regression/rt-cetsa-moltprot-tool/rt_cetsa_moltenprot.cwl
rename to regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl
diff --git a/regression/rt-cetsa-moltprot-tool/run-plugin.sh b/regression/rt-cetsa-moltenprot-tool/run-plugin.sh
similarity index 87%
rename from regression/rt-cetsa-moltprot-tool/run-plugin.sh
rename to regression/rt-cetsa-moltenprot-tool/run-plugin.sh
index 73c9245..948a180 100755
--- a/regression/rt-cetsa-moltprot-tool/run-plugin.sh
+++ b/regression/rt-cetsa-moltenprot-tool/run-plugin.sh
@@ -14,7 +14,7 @@ LOGLEVEL=INFO
 
 docker run --mount type=bind,source=${datapath},target=/data/  \
             --env POLUS_LOG=${LOGLEVEL} \
-            polusai/rt-cetsa-moltenprot-tool:${version} \
+            polusai/rt-cetsa-moltenproten-tool:${version} \
             --inpDir ${inpDir} \
             --filePattern ${pattern} \
             --outDir ${outDir}
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/RT_CETSA_VERSION b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/MOLTENPROT_VERSION
similarity index 100%
rename from regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/RT_CETSA_VERSION
rename to regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/MOLTENPROT_VERSION
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py
similarity index 100%
rename from regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__init__.py
rename to regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py
similarity index 95%
rename from regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
rename to regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py
index 569bf40..cc867ae 100644
--- a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/__main__.py
+++ b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py
@@ -7,7 +7,7 @@
 
 import filepattern
 import typer
-from polus.tabular.regression.rt_cetsa_moltprot import fit_data
+from polus.tabular.regression.rt_cetsa_moltenprot import fit_data
 
 # Initialize the logger
 logging.basicConfig(
@@ -54,7 +54,6 @@ def main(
     ),
 ) -> None:
     """CLI for rt-cetsa-moltprot-tool."""
-    # TODO: Add to docs that input csv file should be sorted by `Temperature` column.
     logger.info("Starting the CLI for rt-cetsa-moltprot-tool.")
 
     logger.info(f"Input directory: {inp_dir}")
@@ -101,6 +100,7 @@ def main(
         ).astype(int)
         fit_params = fit_params.drop(columns="_index")
         fit_params = fit_params.sort_values(["letter", "number"])
+        fit_params = fit_params.drop(columns=["letter", "number"])
         fit_params.to_csv(fit_params_path, index=True)
 
         # keep only 2 signicant digits for temperature index
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/core.py
similarity index 99%
rename from regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py
rename to regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/core.py
index e1160e5..79d51f7 100644
--- a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/core.py
+++ b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/core.py
@@ -82,7 +82,7 @@
 
 # MoltenProt is stored in a plain-text file VERSION (also used by setup.py)
 # extract and save it to a variable
-with open(os.path.join(__location__, "RT_CETSA_VERSION")) as version_file:
+with open(os.path.join(__location__, "MOLTENPROT_VERSION")) as version_file:
     __version__ = version_file.read().strip()
 
 # get scipy version (some methods may not be available in earlier versions)
diff --git a/regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/models.py b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/models.py
similarity index 100%
rename from regression/rt-cetsa-moltprot-tool/src/polus/tabular/regression/rt_cetsa_moltprot/models.py
rename to regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/models.py

From b50a8841136985c158dd536d49b5b2486f0ddbc3 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 21 May 2024 06:32:44 -0400
Subject: [PATCH 10/18] feat: review api. Removed filepattern.

---
 regression/rt-cetsa-moltenprot-tool/ict.yml   | 12 +--
 .../rt-cetsa-moltenprot-tool/plugin.json      | 12 ++-
 .../rt_cetsa_moltenprot.cwl                   |  2 +-
 .../rt_cetsa_moltenprot/__main__.py           | 89 +++++++++----------
 4 files changed, 55 insertions(+), 60 deletions(-)

diff --git a/regression/rt-cetsa-moltenprot-tool/ict.yml b/regression/rt-cetsa-moltenprot-tool/ict.yml
index 5b987c2..3c4b744 100644
--- a/regression/rt-cetsa-moltenprot-tool/ict.yml
+++ b/regression/rt-cetsa-moltenprot-tool/ict.yml
@@ -13,10 +13,10 @@ inputs:
   name: inpDir
   required: true
   type: path
-- description: Filepattern to parse input files
+- description: intensities file
   format:
-  - pattern
-  name: pattern
+  - intensities
+  name: intensities
   required: false
   type: string
 - description: Generate an output preview.
@@ -41,9 +41,9 @@ ui:
   key: inputs.inpDir
   title: Input data collection
   type: path
-- description: Filepattern to parse input files
-  key: inputs.pattern
-  title: pattern
+- description: Intensities file
+  key: inputs.intensities
+  title: intensities
   type: text
 - description: Generate an output preview.
   key: inputs.preview
diff --git a/regression/rt-cetsa-moltenprot-tool/plugin.json b/regression/rt-cetsa-moltenprot-tool/plugin.json
index 8b44a5a..034b34b 100644
--- a/regression/rt-cetsa-moltenprot-tool/plugin.json
+++ b/regression/rt-cetsa-moltenprot-tool/plugin.json
@@ -22,10 +22,9 @@
       "required": true
     },
     {
-      "name": "filePattern",
+      "name": "intensities",
       "type": "string",
-      "description": "File Pattern to parse input files",
-      "default": ".+",
+      "description": "Filename for the intensities file.",
       "required": false
     }
   ],
@@ -43,10 +42,9 @@
       "description": "Input data collection to be processed by this plugin"
     },
     {
-      "key": "inputs.filePattern",
-      "title": "File Pattern",
-      "description": "File Pattern to parse input files",
-      "default": ".+"
+      "key": "inputs.intensities",
+      "title": "Intensities file",
+      "description": "Intensities file"
     }
   ]
 }
diff --git a/regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl b/regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl
index ee45fd6..98f47eb 100644
--- a/regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl
+++ b/regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl
@@ -7,7 +7,7 @@ inputs:
     type: Directory
   filePattern:
     inputBinding:
-      prefix: --filePattern
+      prefix: --intensities
     type: string?
   preview:
     inputBinding:
diff --git a/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py
index cc867ae..283e787 100644
--- a/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py
+++ b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py
@@ -5,7 +5,6 @@
 import os
 import pathlib
 
-import filepattern
 import typer
 from polus.tabular.regression.rt_cetsa_moltenprot import fit_data
 
@@ -33,15 +32,10 @@ def main(
         readable=True,
         resolve_path=True,
     ),
-    pattern: str = typer.Option(
-        ".+",
-        "--filePattern",
-        help="Pattern to match the files in the input directory.",
-    ),
-    preview: bool = typer.Option(
-        False,
-        "--preview",
-        help="Preview the files that will be processed.",
+    int_filename: str = typer.Option(
+        None,
+        "--intensities",
+        help="name of the intensities file (optional).",
     ),
     out_dir: pathlib.Path = typer.Option(
         ...,
@@ -52,61 +46,64 @@ def main(
         writable=True,
         resolve_path=True,
     ),
+    preview: bool = typer.Option(
+        False,
+        "--preview",
+        help="Preview the files that will be processed.",
+    ),
 ) -> None:
     """CLI for rt-cetsa-moltprot-tool."""
     logger.info("Starting the CLI for rt-cetsa-moltprot-tool.")
 
     logger.info(f"Input directory: {inp_dir}")
-    logger.info(f"File Pattern: {pattern}")
     logger.info(f"Output directory: {out_dir}")
 
+    # NOTE we may eventually deal with other types.
     if POLUS_TAB_EXT != ".csv":
         msg = "this tool can currently only process csv files."
         raise ValueError(msg)
 
-    fp = filepattern.FilePattern(inp_dir, pattern)
-    inp_files = [f[1][0] for f in fp()]
-
-    for f in inp_files:
-        if not f.suffix == POLUS_TAB_EXT:
-            raise ValueError(
-                f"this tool can only process {POLUS_TAB_EXT} files. Got {f}",
+    if int_filename is not None:
+        intensities_file = inp_dir / int_filename
+        if not intensities_file.exists():
+            raise FileNotFoundError(intensities_file)
+    else:
+        if len(list(inp_dir.iterdir())) != 1:
+            raise FileExistsError(
+                f"There should be a single intensities file in {inp_dir}",
             )
+        intensities_file = next(inp_dir.iterdir())
+    logger.info(f"Using intensities file: {intensities_file}")
 
     if preview:
-        outputs: list[str] = []
-        for f in inp_files:
-            fit_params_path = f.stem + "_moltenprot_params" + POLUS_TAB_EXT
-            fit_curves_path = f.stem + "_moltenprot_curves" + POLUS_TAB_EXT
-            outputs = [*outputs, fit_params_path, fit_curves_path]
+        outputs = ["params" + POLUS_TAB_EXT, "values" + POLUS_TAB_EXT]
         out_json = {"files": outputs}
         with (out_dir / "preview.json").open("w") as f:
             json.dump(out_json, f, indent=2)
         return
 
-    for f in inp_files:
-        logger.info(f"Processing plate timeserie: {f}")
-        fit = fit_data(f)
-        fit_params_path = out_dir / (f.stem + "_moltenprot_params" + POLUS_TAB_EXT)
-        fit_curves_path = out_dir / (f.stem + "_moltenprot_curves" + POLUS_TAB_EXT)
-
-        # sort fit_params by row/column
-        fit_params = fit.plate_results
-        fit_params["_index"] = fit_params.index
-        fit_params["letter"] = fit_params.apply(lambda row: row._index[:1], axis=1)
-        fit_params["number"] = fit_params.apply(
-            lambda row: row._index[1:],
-            axis=1,
-        ).astype(int)
-        fit_params = fit_params.drop(columns="_index")
-        fit_params = fit_params.sort_values(["letter", "number"])
-        fit_params = fit_params.drop(columns=["letter", "number"])
-        fit_params.to_csv(fit_params_path, index=True)
-
-        # keep only 2 signicant digits for temperature index
-        fit_curves = fit.plate_raw_corr
-        fit_curves.index = fit_curves.index.map(lambda t: round(t, 2))
-        fit_curves.to_csv(fit_curves_path, index=True)
+    fit = fit_data(intensities_file)
+
+    fit_params_path = out_dir / ("params" + POLUS_TAB_EXT)
+    fit_curves_path = out_dir / ("values" + POLUS_TAB_EXT)
+
+    # sort fit_params by row/column
+    fit_params = fit.plate_results
+    fit_params["_index"] = fit_params.index
+    fit_params["letter"] = fit_params.apply(lambda row: row._index[:1], axis=1)
+    fit_params["number"] = fit_params.apply(
+        lambda row: row._index[1:],
+        axis=1,
+    ).astype(int)
+    fit_params = fit_params.drop(columns="_index")
+    fit_params = fit_params.sort_values(["letter", "number"])
+    fit_params = fit_params.drop(columns=["letter", "number"])
+    fit_params.to_csv(fit_params_path, index=True)
+
+    # keep only 2 signicant digits for temperature index
+    fit_curves = fit.plate_raw_corr
+    fit_curves.index = fit_curves.index.map(lambda t: round(t, 2))
+    fit_curves.to_csv(fit_curves_path, index=True)
 
 
 if __name__ == "__main__":

From 15be75388ee0edbcc2a69519c8be25ce71ed642d Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 21 May 2024 06:33:20 -0400
Subject: [PATCH 11/18] =?UTF-8?q?Bump=20version:=200.1.0=20=E2=86=92=200.2?=
 =?UTF-8?q?.0-dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 regression/rt-cetsa-moltenprot-tool/.bumpversion.cfg          | 2 +-
 regression/rt-cetsa-moltenprot-tool/README.md                 | 2 +-
 regression/rt-cetsa-moltenprot-tool/VERSION                   | 2 +-
 regression/rt-cetsa-moltenprot-tool/ict.yml                   | 4 ++--
 regression/rt-cetsa-moltenprot-tool/plugin.json               | 4 ++--
 regression/rt-cetsa-moltenprot-tool/pyproject.toml            | 2 +-
 regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl   | 2 +-
 .../polus/tabular/regression/rt_cetsa_moltenprot/__init__.py  | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/regression/rt-cetsa-moltenprot-tool/.bumpversion.cfg b/regression/rt-cetsa-moltenprot-tool/.bumpversion.cfg
index b3d8c10..e58bfbe 100644
--- a/regression/rt-cetsa-moltenprot-tool/.bumpversion.cfg
+++ b/regression/rt-cetsa-moltenprot-tool/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0
+current_version = 0.2.0-dev0
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
diff --git a/regression/rt-cetsa-moltenprot-tool/README.md b/regression/rt-cetsa-moltenprot-tool/README.md
index 50ad929..4ebd854 100644
--- a/regression/rt-cetsa-moltenprot-tool/README.md
+++ b/regression/rt-cetsa-moltenprot-tool/README.md
@@ -1,4 +1,4 @@
-# RT_CETSA MoltenProt (v0.1.0)
+# RT_CETSA MoltenProt (v0.2.0-dev0)
 
 This WIPP plugin runs moltenprot regression for the RT-CETSA pipeline.
 
diff --git a/regression/rt-cetsa-moltenprot-tool/VERSION b/regression/rt-cetsa-moltenprot-tool/VERSION
index 6e8bf73..ce0f6f8 100644
--- a/regression/rt-cetsa-moltenprot-tool/VERSION
+++ b/regression/rt-cetsa-moltenprot-tool/VERSION
@@ -1 +1 @@
-0.1.0
+0.2.0-dev0
diff --git a/regression/rt-cetsa-moltenprot-tool/ict.yml b/regression/rt-cetsa-moltenprot-tool/ict.yml
index 3c4b744..fee0d6d 100644
--- a/regression/rt-cetsa-moltenprot-tool/ict.yml
+++ b/regression/rt-cetsa-moltenprot-tool/ict.yml
@@ -3,7 +3,7 @@ author:
 - Antoine Gerardin
 - Najib Ishaq
 contact: nick.schaub@nih.gov
-container: polusai/rt-cetsa-moltproten-tool:0.1.0
+container: polusai/rt-cetsa-moltproten-tool:0.2.0-dev0
 description: Run moltenprot regression for the RT-CETSA pipeline.
 entrypoint: python3 -m polus.tabular.regression.rt_cetsa_moltprot
 inputs:
@@ -49,4 +49,4 @@ ui:
   key: inputs.preview
   title: Preview example output of this plugin
   type: checkbox
-version: 0.1.0
+version: 0.2.0-dev0
diff --git a/regression/rt-cetsa-moltenprot-tool/plugin.json b/regression/rt-cetsa-moltenprot-tool/plugin.json
index 034b34b..c55d213 100644
--- a/regression/rt-cetsa-moltenprot-tool/plugin.json
+++ b/regression/rt-cetsa-moltenprot-tool/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "RT-CETSA Moltprot",
-  "version": "0.1.0",
+  "version": "0.2.0-dev0",
   "title": "RT-CETSA Moltprot",
   "description": "Run moltenprot for the RT-CETSA pipeline.",
   "author": "Nicholas Schaub (nick.schaub@nih.gov), Antoine Gerardin (antoine.gerardin@nih.gov), Najib Ishaq (najib.ishaq@nih.gov)",
@@ -8,7 +8,7 @@
   "repository": "https://github.com/PolusAI/tabular-tools",
   "website": "https://ncats.nih.gov/preclinical/core/informatics",
   "citation": "",
-  "containerId": "polusai/rt-cetsa-moltenproten-tool:0.1.0",
+  "containerId": "polusai/rt-cetsa-moltenproten-tool:0.2.0-dev0",
   "baseCommand": [
     "python3",
     "-m",
diff --git a/regression/rt-cetsa-moltenprot-tool/pyproject.toml b/regression/rt-cetsa-moltenprot-tool/pyproject.toml
index 51f1640..ee6d96b 100644
--- a/regression/rt-cetsa-moltenprot-tool/pyproject.toml
+++ b/regression/rt-cetsa-moltenprot-tool/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "polus_tabular_regression_rt_cetsa_moltenprot"
-version = "0.1.0"
+version = "0.2.0-dev0"
 description = "Run moltenprot for the RT-CETSA pipeline."
 authors = [
     "Nick Schaub <nick.schaub@nih.gov>",
diff --git a/regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl b/regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl
index 98f47eb..acae74f 100644
--- a/regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl
+++ b/regression/rt-cetsa-moltenprot-tool/rt_cetsa_moltenprot.cwl
@@ -24,7 +24,7 @@ outputs:
     type: Directory
 requirements:
   DockerRequirement:
-    dockerPull: polusai/rt-cetsa-moltenprot-tool:0.1.0
+    dockerPull: polusai/rt-cetsa-moltenprot-tool:0.2.0-dev0
   InitialWorkDirRequirement:
     listing:
     - entry: $(inputs.outDir)
diff --git a/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py
index 62b4e6c..c9c9edf 100644
--- a/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py
+++ b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py
@@ -1,6 +1,6 @@
 """RT_CETSA Moltprot Regression Tool."""
 
-__version__ = "0.1.0"
+__version__ = "0.2.0-dev0"
 
 import os
 import pathlib

From 74e7928cc15d63b07c9c61eb3ace71efe4f4beab Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 21 May 2024 06:38:15 -0400
Subject: [PATCH 12/18] fix: add version in __init__.py

---
 .../src/polus/tabular/regression/rt_cetsa_analysis/__init__.py  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py
index 98fb3e2..a4634cf 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py
@@ -1 +1,3 @@
 """RT CETSA Analysis."""
+
+__version__ = "0.1.0"

From a6f05046a0a76d327d8cd26c19ef90a3104a7e83 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Tue, 21 May 2024 06:38:19 -0400
Subject: [PATCH 13/18] =?UTF-8?q?Bump=20version:=200.1.0=20=E2=86=92=200.2?=
 =?UTF-8?q?.0-dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 regression/rt-cetsa-analysis-tool/.bumpversion.cfg            | 2 +-
 regression/rt-cetsa-analysis-tool/README.md                   | 2 +-
 regression/rt-cetsa-analysis-tool/VERSION                     | 2 +-
 regression/rt-cetsa-analysis-tool/ict.yml                     | 4 ++--
 regression/rt-cetsa-analysis-tool/plugin.json                 | 4 ++--
 regression/rt-cetsa-analysis-tool/pyproject.toml              | 2 +-
 regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl       | 2 +-
 .../polus/tabular/regression/rt_cetsa_analysis/__init__.py    | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/regression/rt-cetsa-analysis-tool/.bumpversion.cfg b/regression/rt-cetsa-analysis-tool/.bumpversion.cfg
index 3bc5c4c..daa7920 100644
--- a/regression/rt-cetsa-analysis-tool/.bumpversion.cfg
+++ b/regression/rt-cetsa-analysis-tool/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0
+current_version = 0.2.0-dev0
 commit = True
 tag = False
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))?
diff --git a/regression/rt-cetsa-analysis-tool/README.md b/regression/rt-cetsa-analysis-tool/README.md
index acde158..87fd543 100644
--- a/regression/rt-cetsa-analysis-tool/README.md
+++ b/regression/rt-cetsa-analysis-tool/README.md
@@ -1,4 +1,4 @@
-# RT_CETSA Analysis Tool (v0.1.0)
+# RT_CETSA Analysis Tool (v0.2.0-dev0)
 
 This WIPP plugin runs statistical analysis for the RT-CETSA pipeline.
 
diff --git a/regression/rt-cetsa-analysis-tool/VERSION b/regression/rt-cetsa-analysis-tool/VERSION
index 6e8bf73..ce0f6f8 100644
--- a/regression/rt-cetsa-analysis-tool/VERSION
+++ b/regression/rt-cetsa-analysis-tool/VERSION
@@ -1 +1 @@
-0.1.0
+0.2.0-dev0
diff --git a/regression/rt-cetsa-analysis-tool/ict.yml b/regression/rt-cetsa-analysis-tool/ict.yml
index 353fc84..86dc427 100644
--- a/regression/rt-cetsa-analysis-tool/ict.yml
+++ b/regression/rt-cetsa-analysis-tool/ict.yml
@@ -3,7 +3,7 @@ author:
 - Antoine Gerardin
 - Najib Ishaq
 contact: nick.schaub@nih.gov
-container: polusai/rt-cetsa-analysis-simple-tool:0.1.0
+container: polusai/rt-cetsa-analysis-simple-tool:0.2.0-dev0
 description: Run statisical analysis for the RT-CETSA pipeline.
 entrypoint: python3 -m polus.tabular.regression.rt_cetsa_analysis
 inputs:
@@ -59,4 +59,4 @@ ui:
   key: inputs.preview
   title: Preview example output of this plugin
   type: checkbox
-version: 0.1.0
+version: 0.2.0-dev0
diff --git a/regression/rt-cetsa-analysis-tool/plugin.json b/regression/rt-cetsa-analysis-tool/plugin.json
index f16fd81..7f25700 100644
--- a/regression/rt-cetsa-analysis-tool/plugin.json
+++ b/regression/rt-cetsa-analysis-tool/plugin.json
@@ -1,6 +1,6 @@
 {
   "name": "RT-CETSA Analysis",
-  "version": "0.1.0",
+  "version": "0.2.0-dev0",
   "title": "RT-CETSA Analysis",
   "description": "Run statistical analysis for the RT-CETSA pipeline.",
   "author": "Nicholas Schaub (nick.schaub@nih.gov), Antoine Gerardin (antoine.gerardin@nih.gov), Najib Ishaq (najib.ishaq@nih.gov)",
@@ -8,7 +8,7 @@
   "repository": "https://github.com/PolusAI/tabular-tools",
   "website": "https://ncats.nih.gov/preclinical/core/informatics",
   "citation": "",
-  "containerId": "polusai/rt-cetsa-analysis-simple-tool:0.1.0",
+  "containerId": "polusai/rt-cetsa-analysis-simple-tool:0.2.0-dev0",
   "baseCommand": [
     "python3",
     "-m",
diff --git a/regression/rt-cetsa-analysis-tool/pyproject.toml b/regression/rt-cetsa-analysis-tool/pyproject.toml
index 323a14f..355a020 100644
--- a/regression/rt-cetsa-analysis-tool/pyproject.toml
+++ b/regression/rt-cetsa-analysis-tool/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "polus_tabular_regression_rt_cetsa_analysis"
-version = "0.1.0"
+version = "0.2.0-dev0"
 description = "Run statiscal analysis for the RT-CETSA pipeline."
 authors = [
     "Nick Schaub <nick.schaub@nih.gov>",
diff --git a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
index 149b3a3..ad3390f 100644
--- a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
+++ b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
@@ -36,7 +36,7 @@ requirements:
     envDef:
       WORKDIR: /opt/executables/
   DockerRequirement:
-    dockerPull: polusai/rt-cetsa-analysis-simple-tool:0.1.0
+    dockerPull: polusai/rt-cetsa-analysis-simple-tool:0.2.0-dev0
   InitialWorkDirRequirement:
     listing:
     - entry: $(inputs.outDir)
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py
index a4634cf..bb098b4 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__init__.py
@@ -1,3 +1,3 @@
 """RT CETSA Analysis."""
 
-__version__ = "0.1.0"
+__version__ = "0.2.0-dev0"

From ec31dd9bd07e65677ede85a78c711d71102bc15b Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Wed, 22 May 2024 04:30:42 -0400
Subject: [PATCH 14/18] chore: refactor code for dashboard integration.

---
 .../rt_cetsa_moltenprot/__init__.py           | 34 ++++++++++++++++++-
 .../rt_cetsa_moltenprot/__main__.py           | 19 ++---------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py
index c9c9edf..e3454f1 100644
--- a/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py
+++ b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__init__.py
@@ -16,7 +16,39 @@
 POLUS_TAB_EXT = os.environ.get("POLUS_TAB_EXT", ".csv")
 
 
-def fit_data(file_path: pathlib.Path) -> pandas.DataFrame:
+def run_moltenprot_fit(
+    file_path: pathlib.Path,
+) -> tuple[pandas.DataFrame, pandas.DataFrame]:
+    """Run moltenprot.
+
+    Args:
+        file_path : path to intensities file.
+
+    Returns:
+        tuple of dataframe containing the fit_params and the curve values.
+    """
+    fit = fit_data(file_path)
+
+    # sort fit_params by row/column
+    fit_params = fit.plate_results
+    fit_params["_index"] = fit_params.index
+    fit_params["letter"] = fit_params.apply(lambda row: row._index[:1], axis=1)
+    fit_params["number"] = fit_params.apply(
+        lambda row: row._index[1:],
+        axis=1,
+    ).astype(int)
+    fit_params = fit_params.drop(columns="_index")
+    fit_params = fit_params.sort_values(["letter", "number"])
+    fit_params = fit_params.drop(columns=["letter", "number"])
+
+    # keep only 2 signicant digits for temperature index
+    fit_curves = fit.plate_raw_corr
+    fit_curves.index = fit_curves.index.map(lambda t: round(t, 2))
+
+    return fit_params, fit_curves
+
+
+def fit_data(file_path: pathlib.Path) -> core.MoltenProtFit:
     """Fit data to a model using Moltprot."""
     fit = core.MoltenProtFit(
         filename=file_path,
diff --git a/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py
index 283e787..5cd9c49 100644
--- a/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py
+++ b/regression/rt-cetsa-moltenprot-tool/src/polus/tabular/regression/rt_cetsa_moltenprot/__main__.py
@@ -6,7 +6,7 @@
 import pathlib
 
 import typer
-from polus.tabular.regression.rt_cetsa_moltenprot import fit_data
+from polus.tabular.regression.rt_cetsa_moltenprot import run_moltenprot_fit
 
 # Initialize the logger
 logging.basicConfig(
@@ -82,27 +82,12 @@ def main(
             json.dump(out_json, f, indent=2)
         return
 
-    fit = fit_data(intensities_file)
+    fit_params, fit_curves = run_moltenprot_fit(intensities_file)
 
     fit_params_path = out_dir / ("params" + POLUS_TAB_EXT)
     fit_curves_path = out_dir / ("values" + POLUS_TAB_EXT)
 
-    # sort fit_params by row/column
-    fit_params = fit.plate_results
-    fit_params["_index"] = fit_params.index
-    fit_params["letter"] = fit_params.apply(lambda row: row._index[:1], axis=1)
-    fit_params["number"] = fit_params.apply(
-        lambda row: row._index[1:],
-        axis=1,
-    ).astype(int)
-    fit_params = fit_params.drop(columns="_index")
-    fit_params = fit_params.sort_values(["letter", "number"])
-    fit_params = fit_params.drop(columns=["letter", "number"])
     fit_params.to_csv(fit_params_path, index=True)
-
-    # keep only 2 signicant digits for temperature index
-    fit_curves = fit.plate_raw_corr
-    fit_curves.index = fit_curves.index.map(lambda t: round(t, 2))
     fit_curves.to_csv(fit_curves_path, index=True)
 
 

From e17bb38b0af582bf2680e5f7cf933d07537420d4 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Wed, 22 May 2024 06:29:44 -0400
Subject: [PATCH 15/18] dev: add tests to satisfy git actions  Please enter the
 commit message for your changes. Lines starting

---
 .../rt-cetsa-moltenprot-tool/tests/__init__.py  |  1 +
 .../rt-cetsa-moltenprot-tool/tests/conftest.py  | 13 +++++++++++++
 .../tests/test_moltenprot.py                    | 17 +++++++++++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 regression/rt-cetsa-moltenprot-tool/tests/__init__.py
 create mode 100644 regression/rt-cetsa-moltenprot-tool/tests/conftest.py
 create mode 100644 regression/rt-cetsa-moltenprot-tool/tests/test_moltenprot.py

diff --git a/regression/rt-cetsa-moltenprot-tool/tests/__init__.py b/regression/rt-cetsa-moltenprot-tool/tests/__init__.py
new file mode 100644
index 0000000..d420712
--- /dev/null
+++ b/regression/rt-cetsa-moltenprot-tool/tests/__init__.py
@@ -0,0 +1 @@
+"""Tests."""
diff --git a/regression/rt-cetsa-moltenprot-tool/tests/conftest.py b/regression/rt-cetsa-moltenprot-tool/tests/conftest.py
new file mode 100644
index 0000000..4983315
--- /dev/null
+++ b/regression/rt-cetsa-moltenprot-tool/tests/conftest.py
@@ -0,0 +1,13 @@
+"""Set up."""
+import pytest
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    """Add options to pytest."""
+    parser.addoption(
+        "--slow",
+        action="store_true",
+        dest="slow",
+        default=False,
+        help="run slow tests",
+    )
diff --git a/regression/rt-cetsa-moltenprot-tool/tests/test_moltenprot.py b/regression/rt-cetsa-moltenprot-tool/tests/test_moltenprot.py
new file mode 100644
index 0000000..80816cc
--- /dev/null
+++ b/regression/rt-cetsa-moltenprot-tool/tests/test_moltenprot.py
@@ -0,0 +1,17 @@
+"""Tests."""
+from polus.tabular.regression.rt_cetsa_moltenprot import run_moltenprot_fit
+from pathlib import Path
+
+import pytest
+
+
+@pytest.mark.skipif("not config.getoption('slow')")
+def test_moltenprot():
+    path = Path(__file__).parent / "data" / "plate_(1-58).csv"
+    params, values = run_moltenprot_fit(path)
+    assert params is not None
+    assert values is not None
+
+
+def test_dummy_test():
+    pass

From d54b950af2ad348adfc95e21b7fc69fe658b9376 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Wed, 22 May 2024 08:57:38 -0400
Subject: [PATCH 16/18] fix: values and params and not required.

---
 regression/rt-cetsa-analysis-tool/build-docker.sh             | 2 +-
 regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl       | 4 ++--
 .../polus/tabular/regression/rt_cetsa_analysis/__main__.py    | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/regression/rt-cetsa-analysis-tool/build-docker.sh b/regression/rt-cetsa-analysis-tool/build-docker.sh
index aaecd7a..93d9b45 100755
--- a/regression/rt-cetsa-analysis-tool/build-docker.sh
+++ b/regression/rt-cetsa-analysis-tool/build-docker.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 version=$(<VERSION)
-docker build . -t polusai/rt-cetsa-analysis-simple-tool:"${version}"
+docker build . -t polusai/rt-cetsa-analysis-tool:"${version}"
diff --git a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
index ad3390f..c8f4fc9 100644
--- a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
+++ b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
@@ -9,11 +9,11 @@ inputs:
   params:
     inputBinding:
       prefix: --params
-    type: string
+    type: string?
   values:
     inputBinding:
       prefix: --values
-    type: string
+    type: string?
   platemap:
     inputBinding:
       prefix: --platemap
diff --git a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
index e4af4bb..8ae52c9 100644
--- a/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
+++ b/regression/rt-cetsa-analysis-tool/src/polus/tabular/regression/rt_cetsa_analysis/__main__.py
@@ -35,12 +35,12 @@ def main(
         resolve_path=True,
     ),
     params_filename: str = typer.Option(
-        ...,
+        None,
         "--params",
         help="name of the moltenprot fit params csv file in the input directory.",
     ),
     values_filename: str = typer.Option(
-        ...,
+        None,
         "--values",
         help="name of the moltenprot baseline corrected values csv file in the input directory.",
     ),

From 2f272be4fabe79691945ed7ec38cd108de5be9a3 Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Wed, 22 May 2024 08:58:09 -0400
Subject: [PATCH 17/18] fix: values and params and not required.

---
 regression/rt-cetsa-analysis-tool/ict.yml     | 4 ++--
 regression/rt-cetsa-analysis-tool/plugin.json | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/regression/rt-cetsa-analysis-tool/ict.yml b/regression/rt-cetsa-analysis-tool/ict.yml
index 86dc427..1918ef8 100644
--- a/regression/rt-cetsa-analysis-tool/ict.yml
+++ b/regression/rt-cetsa-analysis-tool/ict.yml
@@ -11,13 +11,13 @@ inputs:
   format:
   - params
   name: params
-  required: true
+  required: false
   type: string
 - description: Baseline corrected csv files
   format:
   - values
   name: values
-  required: true
+  required: false
   type: string
 - description: platemap excel file
   format:
diff --git a/regression/rt-cetsa-analysis-tool/plugin.json b/regression/rt-cetsa-analysis-tool/plugin.json
index 7f25700..0bbb6b9 100644
--- a/regression/rt-cetsa-analysis-tool/plugin.json
+++ b/regression/rt-cetsa-analysis-tool/plugin.json
@@ -18,14 +18,12 @@
     {
       "name": "params",
       "type": "genericData",
-      "description": "Fit params csv file",
-      "required": true
+      "description": "Fit params csv file"
     },
     {
       "name": "values",
       "type": "genericData",
-      "description": "Baseline corrected csv files",
-      "required": true
+      "description": "Baseline corrected csv files"
     },
     {
       "name": "platemap",

From bff4e9d4df644f55c8daab15dc7022901d65413d Mon Sep 17 00:00:00 2001
From: agerardin <antoine.gerardin@gmail.com>
Date: Wed, 22 May 2024 12:00:05 -0400
Subject: [PATCH 18/18] fix cwl.

---
 regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
index c8f4fc9..fed422f 100644
--- a/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
+++ b/regression/rt-cetsa-analysis-tool/rt_cetsa_analysis.cwl
@@ -36,7 +36,7 @@ requirements:
     envDef:
       WORKDIR: /opt/executables/
   DockerRequirement:
-    dockerPull: polusai/rt-cetsa-analysis-simple-tool:0.2.0-dev0
+    dockerPull: polusai/rt-cetsa-analysis-tool:0.2.0-dev0
   InitialWorkDirRequirement:
     listing:
     - entry: $(inputs.outDir)