diff --git a/2D_Classifier/README.md b/2D_Classifier/README.md deleted file mode 100644 index d3f076d..0000000 --- a/2D_Classifier/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# 2D Classifier - -## Introduction -The template provides a starting point with helper classes and functions to facilitate rapid development and deployment of applications for 2D Classifiers. - -## Layout - -### There are 3 sections: - -#### 1. app -This area is used to work on the deployment of the application and provides helper frameworks. - -#### 2. tests -This area holds unit tests for both app and train. - -#### 3. train -This area is used to work on the training and creation of AI models - -*Further information can be found within each folder.* - - -## Examples - -https://github.com/GSTT-CSC/wrist-fracture-x-ray - -For further information on MLOps please refer to the MLOps repo: - -https://github.com/GSTT-CSC/MLOps - diff --git a/2D_Classifier/train/requirements.txt b/2D_Classifier/train/requirements.txt deleted file mode 100644 index d24b654..0000000 --- a/2D_Classifier/train/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -csc-mlops -torch -torchmetrics -torchvision -pytorch_lightning -ray -timm -pytest -pytest-cov -flake8 diff --git a/2D_Classifier/app/__init__.py b/project/app/__init__.py similarity index 100% rename from 2D_Classifier/app/__init__.py rename to project/app/__init__.py diff --git a/2D_Classifier/tests/__init__.py b/project/app/app_tests/__init__.py similarity index 100% rename from 2D_Classifier/tests/__init__.py rename to project/app/app_tests/__init__.py diff --git a/2D_Classifier/train/Dockerfile b/project/classifier_2d/Dockerfile similarity index 84% rename from 2D_Classifier/train/Dockerfile rename to project/classifier_2d/Dockerfile index 2a7e158..0d1d35c 100644 --- a/2D_Classifier/train/Dockerfile +++ b/project/classifier_2d/Dockerfile @@ -5,7 +5,7 @@ WORKDIR /train ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && \ - apt-get install -y --no-install-recommends build-essential git rsync software-properties-common ffmpeg libsm6 libxext6 && \ + apt-get install -y --no-install-recommends build-essential git rsync ffmpeg libsm6 libxext6 && \ rm -rf /var/lib/apt/lists/* ENV PYTHONPATH="/mlflow/projects/code/:$PYTHONPATH" diff --git a/2D_Classifier/train/README.md b/project/classifier_2d/README.md similarity index 98% rename from 2D_Classifier/train/README.md rename to project/classifier_2d/README.md index a8304db..0364a0f 100644 --- a/2D_Classifier/train/README.md +++ b/project/classifier_2d/README.md @@ -1,3 +1,5 @@ +# 2D Classifier THIS NEEDS UPDATING + ## Example Workflow: #### 1. Adapt XNATDataImport.py for your data @@ -58,3 +60,5 @@ This dockerfile sets up the Docker image that the MLOps run will utilise. In the example this is just a simple environment running python version 3.10. You will most likely need to adapt this for your project. + + diff --git a/2D_Classifier/tests/app_tests/__init__.py b/project/classifier_2d/__init__.py similarity index 100% rename from 2D_Classifier/tests/app_tests/__init__.py rename to project/classifier_2d/__init__.py diff --git a/2D_Classifier/tests/train_tests/__init__.py b/project/classifier_2d/config/__init__.py similarity index 100% rename from 2D_Classifier/tests/train_tests/__init__.py rename to project/classifier_2d/config/__init__.py diff --git a/2D_Classifier/train/config/config.cfg b/project/classifier_2d/config/config.cfg similarity index 100% rename from 2D_Classifier/train/config/config.cfg rename to project/classifier_2d/config/config.cfg diff --git a/2D_Classifier/train/config/local_config.cfg b/project/classifier_2d/config/local_config.cfg similarity index 100% rename from 2D_Classifier/train/config/local_config.cfg rename to project/classifier_2d/config/local_config.cfg diff --git a/2D_Classifier/train/scripts/__init__.py b/project/classifier_2d/mlops/__init__.py similarity index 100% rename from 2D_Classifier/train/scripts/__init__.py rename to project/classifier_2d/mlops/__init__.py diff --git a/2D_Classifier/train/scripts/train.py b/project/classifier_2d/mlops/train.py similarity index 89% rename from 2D_Classifier/train/scripts/train.py rename to project/classifier_2d/mlops/train.py index c38fbc1..3e360ab 100644 --- a/2D_Classifier/train/scripts/train.py +++ b/project/classifier_2d/mlops/train.py @@ -1,21 +1,18 @@ -import sys import configparser +import json import logging -import os import multiprocessing -import json +import os import mlflow import pytorch_lightning as pl import torch -from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint -from ray.air.integrations.mlflow import setup_mlflow +from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor, ModelCheckpoint from torch.cuda import is_available as cuda_available -from src.DataModule import DataModule -from src.Network import Network -from src.DataModule import label_dict -from src.XNATDataImport import XNATDataImport +from src.data_import_xnat import DataImportXNAT +from src.datamodule import DataModule, label_dict +from src.network import Network logger = logging.getLogger(__name__) @@ -36,7 +33,7 @@ def train(config): else multiprocessing.cpu_count() ) - importer = XNATDataImport( + importer = DataImportXNAT( xnat_configuration = xnat_configuration, num_workers = num_workers ) @@ -48,14 +45,6 @@ def train(config): data = importer.xnat_image_download(raw_data) # Set up mflow experiment - setup_mlflow( - tracking_uri=mlflow.get_tracking_uri(), - experiment_id=mlflow.get_experiment_by_name( - config["project"]["name"] - ).experiment_id - if mlflow.get_experiment_by_name(config["project"]["name"]) - else mlflow.create_experiment(config["project"]["name"]), - ) with mlflow.start_run(nested=True): save_best_model = True @@ -93,6 +82,10 @@ def train(config): label_smoothing = float(config['params']['label_smoothing']), ) + # Callbacks + checkpoint_metric = config['params']['checkpoint_metric'] + checkpoint_mode = "min" if checkpoint_metric == "val_loss" else "max" + # Callbacks callbacks = [] callbacks.append(LearningRateMonitor(logging_interval="step")) @@ -104,6 +97,14 @@ def train(config): ) callbacks.append(checkpoint_callback) + early_stopping_callback = EarlyStopping( + monitor=checkpoint_metric, + patience=10, + mode=checkpoint_mode, + verbose=True, + ) + callbacks.append(early_stopping_callback) + # configure trainer trainer = pl.Trainer( precision="32" if cuda_available() else "16", diff --git a/2D_Classifier/train/scripts/tune.py b/project/classifier_2d/mlops/tune.py similarity index 88% rename from 2D_Classifier/train/scripts/tune.py rename to project/classifier_2d/mlops/tune.py index 623e5cc..b8d7ec7 100644 --- a/2D_Classifier/train/scripts/tune.py +++ b/project/classifier_2d/mlops/tune.py @@ -1,22 +1,18 @@ - import configparser import logging import multiprocessing import os import mlflow +import optuna import pytorch_lightning as pl -from ray.air.integrations.mlflow import setup_mlflow -from pytorch_lightning.callbacks import LearningRateMonitor -from pytorch_lightning.callbacks import ModelCheckpoint +from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint, EarlyStopping from torch.cuda import is_available as cuda_available -from project.DataModule import DataModule -from project.DataModule import label_dict -from project.Network import Network -from project.XNATDataImport import XNATDataImport +from src.data_import_xnat import DataImportXNAT +from src.datamodule import DataModule, label_dict +from src.network import Network -import optuna logger = logging.getLogger(__name__) # Obtain hyperparameters for this trial @@ -105,6 +101,14 @@ def objective(trial,data,config): ) callbacks.append(checkpoint_callback) + early_stopping_callback = EarlyStopping( + monitor="val_loss", + patience=10, + mode="min", + verbose=True, + ) + callbacks.append(early_stopping_callback) + # configure trainer trainer = pl.Trainer( precision="32" if cuda_available() else "16", @@ -147,7 +151,7 @@ def tune(config): else multiprocessing.cpu_count() ) - importer = XNATDataImport( + importer = DataImportXNAT( xnat_configuration = xnat_configuration, num_workers = num_workers ) @@ -158,20 +162,11 @@ def tune(config): # Download images from XNAT data = importer.xnat_image_download(raw_data) - # Set up mflow experiment - setup_mlflow( - tracking_uri=mlflow.get_tracking_uri(), - experiment_id=mlflow.get_experiment_by_name( - config["project"]["name"] - ).experiment_id - if mlflow.get_experiment_by_name(config["project"]["name"]) - else mlflow.create_experiment(config["project"]["name"]), - ) mlflow.pytorch.autolog(log_models=False) # Create optuna study (hyperparameter tuning framework) - study = optuna.create_study(study_name="scaphx-tune", direction="minimize") + study = optuna.create_study(study_name="project-tune", direction="minimize") study.optimize(lambda trial: objective(trial, data, config), n_trials=50) with open(('tune_log.txt'), 'w') as f: diff --git a/2D_Classifier/train/src/__init__.py b/project/classifier_2d/scripts/__init__.py similarity index 100% rename from 2D_Classifier/train/src/__init__.py rename to project/classifier_2d/scripts/__init__.py diff --git a/project/classifier_2d/src/__init__.py b/project/classifier_2d/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/2D_Classifier/train/src/XNATDataImport.py b/project/classifier_2d/src/data_import_xnat.py similarity index 97% rename from 2D_Classifier/train/src/XNATDataImport.py rename to project/classifier_2d/src/data_import_xnat.py index 58ce51c..599d691 100644 --- a/2D_Classifier/train/src/XNATDataImport.py +++ b/project/classifier_2d/src/data_import_xnat.py @@ -2,16 +2,16 @@ from tqdm import tqdm from typing import List -from utils.tools import DataBuilderXNAT +from monai.data import Dataset +from torch.utils.data import DataLoader from xnat.mixin import ImageScanData, SubjectData from src.transforms import load_xnat -from monai.data import Dataset -from torch.utils.data import DataLoader +from src.utils.tools import DataBuilderXNAT logger = logging.getLogger(__name__) -class XNATDataImport(): +class DataImportXNAT(): def __init__(self, xnat_configuration: dict = None, num_workers: int = 4, test_batch: int = 0, n_month_data_window=9999, run_type: str='train'): diff --git a/2D_Classifier/train/src/DataModule.py b/project/classifier_2d/src/datamodule.py similarity index 96% rename from 2D_Classifier/train/src/DataModule.py rename to project/classifier_2d/src/datamodule.py index e06a2a8..7d649cc 100644 --- a/2D_Classifier/train/src/DataModule.py +++ b/project/classifier_2d/src/datamodule.py @@ -1,21 +1,19 @@ import logging from collections import Counter from typing import List, Optional + import mlflow import numpy as np import pytorch_lightning import torch - -from monai.data import CacheDataset, Dataset -from monai.data import pad_list_data_collate +from monai.data import CacheDataset, Dataset, pad_list_data_collate from monai.transforms import Compose - from sklearn.model_selection import train_test_split from torch.cuda import is_available from torch.utils.data import DataLoader -from src.transforms import normalise, train_augment, output -from src.transforms.SafeWrapper import SafeWrapperTransform +from src.transforms import normalise, output, train_augment +from src.transforms.safe_wrapper import SafeWrapperTransform logger = logging.getLogger(__name__) diff --git a/2D_Classifier/train/src/Network.py b/project/classifier_2d/src/network.py similarity index 97% rename from 2D_Classifier/train/src/Network.py rename to project/classifier_2d/src/network.py index 5058ba7..a452915 100644 --- a/2D_Classifier/train/src/Network.py +++ b/project/classifier_2d/src/network.py @@ -1,25 +1,22 @@ import logging -import pytorch_lightning +from abc import ABC + import mlflow +import numpy as np +import pytorch_lightning import torch -from abc import ABC from monai.data import decollate_batch -from monai.transforms import ( - AsDiscrete, - Compose, - Activations, -) -from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, recall_score +from monai.transforms import Activations, AsDiscrete, Compose +from sklearn.metrics import ClassificationReport, ConfusionMatrixDisplay, confusion_matrix, recall_score from timm import create_model from timm.data import Mixup from torch.nn import CrossEntropyLoss from torchmetrics import Accuracy, F1Score -from src.DataModule import label_dict from torchmetrics.classification import MulticlassAUROC -import numpy as np -logger = logging.getLogger(__name__) +from src.datamodule import label_dict +logger = logging.getLogger(__name__) class Network(pytorch_lightning.LightningModule, ABC): """ diff --git a/2D_Classifier/train/src/transforms/__init__.py b/project/classifier_2d/src/transforms/__init__.py similarity index 64% rename from 2D_Classifier/train/src/transforms/__init__.py rename to project/classifier_2d/src/transforms/__init__.py index 385d338..33b8881 100644 --- a/2D_Classifier/train/src/transforms/__init__.py +++ b/project/classifier_2d/src/transforms/__init__.py @@ -1,29 +1,31 @@ import torch from monai.transforms import ( - LoadImage, - SqueezeDimd, - EnsureChannelFirstd, - CropForegroundd, - Resized, - ScaleIntensityd, CastToTyped, - RandFlipd, - RandZoomd, - RandRotated, + CropForegroundd, + EnsureChannelFirstd, + EnsureTyped, + LoadImage, + NormalizeIntensityd, + RandAdjustContrastd, RandAffined, + RandCoarseDropoutd, + RandFlipd, RandGaussianNoised, RandGaussianSmoothd, + RandRotated, RandScaleIntensityd, - RandAdjustContrastd, - RandCoarseDropoutd, + RandZoomd, ResizeWithPadOrCropd, - ToTensord, + Resized, + ScaleIntensityd, + ScaleIntensityRangePercentilesd, SelectItemsd, - EnsureTyped, Spacingd, + SqueezeDimd, + ToTensord, ) -from src.transforms.LoadImageXNATd import LoadImageXNATd +from src.transforms.load_image_xnatd import LoadImageXNATd def load_xnat(xnat_configuration: dict): """ @@ -47,9 +49,8 @@ def normalise(image_size): EnsureChannelFirstd(keys=['image']), CropForegroundd(keys=['image'], source_key='image'), Resized(keys=['image'], size_mode='longest', spatial_size=image_size+20), - #Maybe limit top intensity in case of big spikes? - ScaleIntensityd(keys=["image"], minv=0.0, maxv=255.0), - CastToTyped(keys=["image"], dtype=torch.uint8), + ScaleIntensityRangePercentilesd(keys=["image"], lower=0, upper=99, b_min=0.0, b_max=255.0, clip=True), + CastToTyped(keys=["image"], dtype=torch.float32), ] def train_augment(image_size): @@ -59,20 +60,20 @@ def train_augment(image_size): """ return [ RandFlipd(keys=['image'], spatial_axis=0, prob=0.5), - RandZoomd(keys=['image'], prob=0.2, min_zoom=1.05,max_zoom=1.1), - RandRotated(keys=['image'], prob=0.2, range_x=0.4), - RandAffined(keys=['image'], prob=0.2, padding_mode='zeros'), - RandGaussianNoised(keys=['image'], prob=0.1, mean=0.0, std=0.1), - RandGaussianSmoothd(keys=['image'], prob=0.2, sigma_x=(0.5,1.0)), - RandScaleIntensityd(keys=['image'], prob=0.15, factors=(0.75,1.25)), - RandAdjustContrastd(keys=['image'], prob=0.1, gamma=(0.5,2), retain_stats=True, invert_image=True), - RandAdjustContrastd(keys=['image'], prob=0.3, gamma=(0.5,2), retain_stats=True, invert_image=False), + RandZoomd(keys=['image'], prob=0.4, min_zoom=1.05,max_zoom=1.1), + RandRotated(keys=['image'], prob=0.4, range_x=0.4), + RandAffined(keys=['image'], prob=0.3, padding_mode='zeros'), + RandGaussianNoised(keys=['image'], prob=0.3, mean=0.0, std=10.0), + RandGaussianSmoothd(keys=['image'], prob=0.35, sigma_x=(0.5,1.0), sigma_y=(0.5,1.0)), + RandScaleIntensityd(keys=['image'], prob=0.3, factors=(0.75,1.25)), + RandAdjustContrastd(keys=['image'], prob=0.2, gamma=(0.5,2), retain_stats=True, invert_image=True), + RandAdjustContrastd(keys=['image'], prob=0.4, gamma=(0.5,2), retain_stats=True, invert_image=False), ResizeWithPadOrCropd( keys=["image"], spatial_size=(image_size,image_size), mode='replicate' ), - RandCoarseDropoutd(keys=['image'], prob=0.5, fill_value=0, holes=8, max_holes=16, spatial_size=(10,10), max_spatial_size=(36,36)), + RandCoarseDropoutd(keys=['image'], prob=0.35, fill_value=0, holes=8, max_holes=16, spatial_size=(10,10), max_spatial_size=(15,15)), ] def output(image_size): @@ -86,6 +87,9 @@ def output(image_size): mode='replicate' ), ScaleIntensityd(keys=["image"], minv=0.0, maxv=1), + # Normalize with grayscale-averaged ImageNet stats (mean=0.449, std=0.226) + # Required for pretrained ImageNet models + NormalizeIntensityd(keys=["image"], subtrahend=0.449, divisor=0.226), ToTensord(keys=['image', 'label']), SelectItemsd(keys=['subject_id', 'image', 'label']), EnsureTyped(keys=['image', 'label'], track_meta=False), diff --git a/2D_Classifier/train/src/transforms/LoadImageXNATd.py b/project/classifier_2d/src/transforms/load_image_xnatd.py similarity index 96% rename from 2D_Classifier/train/src/transforms/LoadImageXNATd.py rename to project/classifier_2d/src/transforms/load_image_xnatd.py index d91eb39..251cf69 100644 --- a/2D_Classifier/train/src/transforms/LoadImageXNATd.py +++ b/project/classifier_2d/src/transforms/load_image_xnatd.py @@ -1,6 +1,4 @@ -""" -MONAI MapTransform for importing image data from XNAT -""" +"""MONAI MapTransform for importing image data from XNAT.""" import glob import logging import os @@ -9,16 +7,12 @@ import xnat from monai.config import KeysCollection -from monai.transforms import MapTransform, LoadImage -from monai.transforms import Transform +from monai.transforms import LoadImage, MapTransform, Transform logger = logging.getLogger(__name__) - class LoadImageXNATd(MapTransform): - """ - MapTransform for importing image data from XNAT - """ + """MapTransform for importing image data from XNAT.""" def __init__(self, keys: KeysCollection, xnat_configuration: dict = None, image_loader: Transform = LoadImage(), validate_data: bool = False, diff --git a/2D_Classifier/train/src/transforms/SafeWrapper.py b/project/classifier_2d/src/transforms/safe_wrapper.py similarity index 95% rename from 2D_Classifier/train/src/transforms/SafeWrapper.py rename to project/classifier_2d/src/transforms/safe_wrapper.py index 0436d64..d3d65a0 100644 --- a/2D_Classifier/train/src/transforms/SafeWrapper.py +++ b/project/classifier_2d/src/transforms/safe_wrapper.py @@ -1,7 +1,6 @@ -# Wrapper for Monai Transforms so failures are logged instead of run ending - -import os +"""Wrapper for Monai Transforms so failures are logged instead of run ending.""" import csv +import os import torch class SafeWrapperTransform: diff --git a/project/classifier_2d/src/utils/__init__.py b/project/classifier_2d/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/2D_Classifier/train/src/utils/tools.py b/project/classifier_2d/src/utils/tools.py similarity index 99% rename from 2D_Classifier/train/src/utils/tools.py rename to project/classifier_2d/src/utils/tools.py index 8df0b4e..5dd988b 100644 --- a/2D_Classifier/train/src/utils/tools.py +++ b/project/classifier_2d/src/utils/tools.py @@ -10,7 +10,6 @@ logger = logging.getLogger(__name__) - class DataBuilderXNAT: def __init__(self, xnat_configuration: dict, actions: list = None, flatten_output=True, test_batch: int = -1, diff --git a/project/shared/__init__.py b/project/shared/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/project/tests/__init__.py b/project/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/project/tests/train_tests/__init__.py b/project/tests/train_tests/__init__.py new file mode 100644 index 0000000..e69de29