Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
2f9d3b8
per n epoch snapshot tsne
CerenPaja Nov 5, 2025
4e13749
per n epoch snaphot, error
CerenPaja Nov 5, 2025
2c52f64
snapshot tsne script with errors
CerenPaja Nov 6, 2025
281f6b6
fix parmaeter
dn070017 Nov 11, 2025
7d36a71
per epoch and prior prints
CerenPaja Nov 27, 2025
dd8eafe
Bug Fix v1
CerenPaja Dec 2, 2025
97d7962
Bug Fix v2
CerenPaja Dec 2, 2025
30ef9af
Bug Fix v3
CerenPaja Dec 2, 2025
6af0aee
Bug Fix v4 fin
CerenPaja Dec 2, 2025
0ef959c
Bug Fix v5 fin
CerenPaja Dec 4, 2025
43d7d52
Bug Fix v6 API
CerenPaja Dec 8, 2025
85b3574
Bug Fix v7 API
CerenPaja Dec 8, 2025
29db121
Updated from develop branch plus my adds PeriodicEpoch and Grid Prior
CerenPaja Jan 9, 2026
61b7e51
disable kl and other add-ins
CerenPaja Jan 30, 2026
7b00112
Vanilla + GMM KL
Feb 3, 2026
73bbe7d
Vanilla + GMM + PeriodicTSNE
CerenPaja Feb 3, 2026
caee09c
Vanilla + GMM + PeriodicTSNE smll fix
CerenPaja Feb 5, 2026
9c49869
Fix exploding gradients and PeriodicTSNECallback
Feb 10, 2026
f9f1225
Student-t dist added to Vanilla + GMM KL
Feb 16, 2026
d592b6f
Student-t dist added to Vanilla + GMM KL + BugFix
Feb 17, 2026
4a6c407
Annealing between vanilla and gmm
Mar 11, 2026
179b382
Annealing Fix v1
CerenPaja Mar 12, 2026
d8ba296
Crossfade Annealing works
CerenPaja Mar 26, 2026
b533088
Kmeans++ Seeding for prior initialization
CerenPaja Mar 26, 2026
9777666
Kmeans and 3 Phases KL
CerenPaja Apr 9, 2026
8d5c16c
3 phases merged into one big annealing
CerenPaja Apr 9, 2026
b9c5235
Per Sample Dispersion
CerenPaja Apr 14, 2026
d6c3e59
New KLAnnealing and Kmeans added to multi-modal architecture
CerenPaja Apr 27, 2026
c3d4305
fix merge conflict
dn070017 Apr 30, 2026
a68ffe2
New fixes on scheduler
CerenPaja May 3, 2026
a51990d
add encode and decode functions
dn070017 May 7, 2026
23a1d84
use pixi to manage environment
dn070017 May 7, 2026
9fd557e
Merge pull request #5 from dn070017/feature/layered_io
CerenPaja May 8, 2026
5302357
Clustering Assignment for z_hat
CerenPaja May 13, 2026
b1969fb
Changes made according to comments
CerenPaja May 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,6 @@ Thumbs.db
*.html
*.png
mlruns
outputs
outputs# pixi environments
.pixi/*
!.pixi/config.toml
33 changes: 33 additions & 0 deletions RunDNAm_small.sh
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[MUST] the scripts related to HPC can be put into maybe additional directory like scripts or scripts/sbatch

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
#SBATCH --job-name=Bulk_DNAmethylation
#SBATCH --account=project_2015212
#SBATCH --partition=gpu
#SBATCH --gres=gpu:v100:1
#SBATCH --cpus-per-task=8
#SBATCH --mem=160G
#SBATCH --time=05:00:00
#SBATCH --output=/scratch/project_2015212/ceren/runs/bulk/%x-%j.out
#SBATCH --error=/scratch/project_2015212/ceren/runs/bulk/%x-%j.err

set -euo pipefail

module load tensorflow/2.18
source /projappl/project_2015212/cavachon/envs/ceren/.venv/bin/activate

export MLFLOW_TRACKING_URI="file:///scratch/project_2015212/ceren/mlruns"
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}
export MKL_NUM_THREADS=${SLURM_CPUS_PER_TASK}
export PYTHONUNBUFFERED=1

# Make sure key dirs exist
mkdir -p /scratch/project_2015212/ceren/runs/bulk/embeddings
mkdir -p /scratch/project_2015212/ceren/checkpoints

cd /projappl/project_2015212/cavachon/CAVACHON

python - << 'PY'
from cavachon.workflow import Workflow
CFG = "/projappl/project_2015212/cavachon/configs/ceren/DNAmethyl_small_run.yaml"
wf = Workflow(CFG)
wf.run()
PY
33 changes: 33 additions & 0 deletions RunDNAm_small2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/bin/bash
#SBATCH --job-name=Bulk_DNAmethylation
#SBATCH --account=project_2015212
#SBATCH --partition=gpu
#SBATCH --gres=gpu:v100:1
#SBATCH --cpus-per-task=8
#SBATCH --mem=90G
#SBATCH --time=01:00:00
#SBATCH --output=/scratch/project_2015212/ceren/runs/bulk2/%x-%j.out
#SBATCH --error=/scratch/project_2015212/ceren/runs/bulk2/%x-%j.err

set -euo pipefail

module load tensorflow/2.18
source /projappl/project_2015212/cavachon/envs/ceren/.venv/bin/activate

export MLFLOW_TRACKING_URI="file:///scratch/project_2015212/ceren/mlruns2"
export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK}
export MKL_NUM_THREADS=${SLURM_CPUS_PER_TASK}
export PYTHONUNBUFFERED=1

# Make sure key dirs exist
mkdir -p /scratch/project_2015212/ceren/runs/bulk2/embeddings
mkdir -p /scratch/project_2015212/ceren/checkpoints2

cd /projappl/project_2015212/cavachon/CAVACHON

python - << 'PY'
from cavachon.workflow import Workflow
CFG = "/projappl/project_2015212/cavachon/configs/ceren/DNAm_second.yaml"
wf = Workflow(CFG)
wf.run()
PY
18 changes: 12 additions & 6 deletions cavachon/dataloader/modifiers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
from .independent_bernoulli_data_modifier import (
IndependentBernoulliDataModifier as IndependentBernoulliDataModifier,
)
from .independent_zero_inflated_negative_binomial_data_modifier import (
IndependentZeroInflatedNegativeBinomialDataModifier as IndependentZeroInflatedNegativeBinomialDataModifier,
)
from .independent_bernoulli_data_modifier import (
IndependentBernoulliDataModifier as IndependentBernoulliDataModifier,
)
from .independent_zero_inflated_negative_binomial_data_modifier import (
IndependentZeroInflatedNegativeBinomialDataModifier as IndependentZeroInflatedNegativeBinomialDataModifier,
)
from .multivariate_normal_diag_data_modifier import (
MultivariateNormalDiagDataModifier as MultivariateNormalDiagDataModifier,
)
from .studentt_data_modifier import (
StudenttDataModifier as StudenttDataModifier,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import functools
from typing import Any, Mapping

import tensorflow as tf

from cavachon.environment.constants import Constants
from cavachon.layers.modifiers.to_dense import ToDense


class MultivariateNormalDiagDataModifier(tf.keras.Model):
"""MultivariateNormalDiagDataModifier

Modifiers for the modality which follows a MultivariateNormalDiag
distribution (Normal distribution with diagonal covariance).
The instance will be used right after the tf.data.Dataset is
created using the DataLoader.

Attributes
----------
modality_names: str
modality name.

modality_key: str
the key used to access the mapping of data created from
tf.data.Dataset. Defaults to `modality_name`_matrix.

modifiers: List[tf.keras.layers.Layer]
list of modifiers that will be applied to the data created from
tf.data.Dataset. Defaults to [ToDense].

See Also
--------
DataLoader: used to create tf.data.Dataset from MuData.

"""

def __init__(self, modality_name: str):
"""Constructor for MultivariateNormalDiag data modifier

Parameters
----------
modality_name: str
the name of modality that needs to be processed.
"""
super().__init__()
self.modality_name: str = modality_name
self.modality_key: str = f"{modality_name}_{Constants.TENSOR_NAME_X}"
# For continuous normalized data (CNV, normalized RNA, etc.)
# we only need to convert sparse matrices to dense tensors
self.modifiers = [ToDense(self.modality_key)]

def call(self, inputs: Mapping[Any, tf.Tensor], training=None, mask=None):
"""Process the data created from tf.data.Dataset.

Parameters
----------
inputs:
Mapping of tf.Tensor, where the keys contain
self.modality_key.

training: bool, optional
Not used (kept for tf.keras.Model API).

mask: tf.Tensor, optional
Not used (kept for tf.keras.Model API).

Returns
-------
Mapping[Any, tf.Tensor]
processed data.

"""
modifiers = self.modifiers
return functools.reduce(lambda x, modifier: modifier(x), modifiers, inputs)
17 changes: 17 additions & 0 deletions cavachon/dataloader/modifiers/studentt_data_modifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import functools
from typing import Any, Mapping

import tensorflow as tf

from cavachon.environment.constants import Constants
from cavachon.layers.modifiers.to_dense import ToDense


class StudenttDataModifier(tf.keras.Model):
def __init__(self, modality_name: str):
super().__init__()
self.modality_key = f"{modality_name}_{Constants.TENSOR_NAME_X}"
self.modifiers = [ToDense(self.modality_key)]

def call(self, inputs: Mapping[Any, tf.Tensor], **kwargs):
return functools.reduce(lambda x, mod: mod(x), self.modifiers, inputs)
3 changes: 3 additions & 0 deletions cavachon/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,6 @@
from .multivariate_normal_diag_distribution import (
MultivariateNormalDiagDistribution as MultivariateNormalDiagDistribution,
)
from .studentt_distribution import (
StudenttDistribution as StudenttDistribution,
)
5 changes: 3 additions & 2 deletions cavachon/distributions/distribution.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from abc import ABC, abstractclassmethod
from abc import ABC, abstractmethod
from typing import Mapping, Union

import tensorflow as tf
Expand All @@ -11,7 +11,8 @@ class Distribution(ABC):

"""

@abstractclassmethod
@classmethod
@abstractmethod
def from_parameterizer_output(
cls, params: Union[tf.Tensor, Mapping[str, tf.Tensor]], **kwargs
) -> tfp.distributions.Distribution:
Expand Down
29 changes: 29 additions & 0 deletions cavachon/distributions/studentt_distribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import tensorflow as tf
import tensorflow_probability as tfp

from cavachon.distributions.distribution import Distribution


class StudenttDistribution(Distribution, tfp.distributions.StudentT):
"""StudentT distribution for continuous data with heavy tails (e.g. CNV)."""

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@classmethod
def from_parameterizer_output(cls, params: tf.Tensor, **kwargs):
"""
Creates distribution from a single tensor.
The last dimension is split into 3: loc, scale, and df.
"""
# Split into 3 equal parts
loc, scale_raw, df_raw = tf.split(params, 3, axis=-1)

# Scale (sigma) must be positive
scale = tf.math.softplus(scale_raw) + 1e-7

# Degrees of Freedom (nu) must be > 0.
# Adding 2.0 ensures the variance is mathematically defined (> 2).
df = tf.math.softplus(df_raw) + 2.0

return cls(df=df, loc=loc, scale=scale, **kwargs)
6 changes: 6 additions & 0 deletions cavachon/layers/parameterizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,9 @@
from .multivariate_normal_diag_sampler import (
MultivariateNormalDiagSampler as MultivariateNormalDiagSampler,
)
from .studentt_parameterizer_layer import (
StudenttParameterizerLayer as StudenttParameterizerLayer,
)
from .studentt_sampler import (
StudenttSampler as StudenttSampler,
)
Loading
Loading