PulseWaveform/ISO_study_main_script at master · Algebreaker/PulseWaveform · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
#### ISO study main script: ####

# This script contains the main PPG processing pipeline and the HED model.
# The HED model is nested within this script. Running it is optional, it can be bypassed if only traditional morphological features are desired.
# This script is also tailored to the ISO dataset - as such, the data is assumed to consist of a number of participants, each with two isoprenaline and two placebo (saline) time series.
# This script is also wrapped within the tryCatch function - to prevent errors in individual time series from interupting the running of the entire dataset.

# Prelims:
  # Download and save scripts from the PulseWaveform repository to a single folder
  # Set working directory to that folder
  # Ensure the below packages (tidyverse etc) are installed
  # Set starting parameters (samplingRate etc) as required

setwd("Desktop/Scripts")

library(tidyverse)
library(splines2)
library(pracma)
library(SplinesUtils)
library(spectral)
library(DescTools)
library(zoo)

source("~/Desktop/Scripts/PPG_funcs.R")
source("~/Desktop/Scripts/iso_funcs.R")

dir <- "~/Desktop/Khalsa_Fletcher_collaboration copy/Physio Dial/ISO_3.0/Iso_3_PhysioData/"         # Directory to the ISO dataset (replace as required)
run_order <- read.csv("~/Desktop/PulseAnalysis/ISO_3_run_dose_order.txt", sep = "")                 # Directory to document for decoding dose orders
Participants <- GetParticipants(dir)


samplingRate <- 40                  # Sampling Rate
beats_in.m <- 10                    # Number of beats in a batch, over which the width, reflectance timing, and decay rate parameters will be fixed (set to 1 to model each waveform independantly (with all 12 parameters free))
all_beats <- TRUE                   # Setting to true ensures all available beats in a given time series are modeled
batch_number <- 10                  # If all beats = false, determines the number of batches of waves to process
subset <- TRUE                      # Subsets Iso 3 time series using IBI intervals (see supplementary material)
pk_thrshd <- 300                    # Objective threshold for initial identification of peaks in the 1st derivative (likely to require changing with different data sources - see line 96)
run_hed <- TRUE                     # Run the HED Model (set to false if only traditional morphological features are desired)
plot_aligned_waves <- TRUE          # Visualise entire waveform sample (including average) for overview of waveform morphology / variability
plot_osnd <- TRUE                   # Visualise OSND points of all waveforms in relation to the average wave of the sample

errors <- list()                    # For storing any time series that breaks the pipeline / model
AllOutputs <- list()                # For storing morphological and HED model outputs
rejected_waves_list1 <- list()      # For storing rejected waves (iso)
rejected_waves_list3 <- list()      # For storing rejected waves (saline)
waves_carried_forward1 <- list()    # For storing number of waves in a time series post cleaning +/- subsetting (iso)
waves_carried_forward3 <- list()    # For storing number of waves in a time series post cleaning +/- subsetting (saline)
ibis_0mg1 <- list()                 # For storing IBI values
ibis_0mg2 <- list()                 #     "         "
ibis_2mg1 <- list()                 #     "         "
ibis_2mg2 <- list()                 #     "         "


for(run in c(1:length(Participants))){

  errors[[run]] <- tryCatch(

    expr = {

      direc <- GetDirec(run, Participants, dir)                                                              # Find the directory for an individual participant
      pairs <- GetPairs(direc = direc[1], run_order, participant_number = run, subjectID = direc[2])         # Identify the four time series (two saline, two isoprenaline) for a given participant


      temp <- FindUndetrendingParams(direc = direc[1], gs = model2.GetSegment, oa = OffsetAdjust,            # ISO specific pre-processing (see readme)
                                     fa = FactorAdjust, u = UnDetrend, factorCutoff = 0, sr = samplingRate,
                                     pairs = pairs, pk_thrshd = pk_thrshd, plot = F)
      factor_value <- temp[1]
      offset_value <- temp[2]

      Outputs <- list()                                                                                      # For storing individual time series outputs
      for(ps in 1:2){                                                                                        # The four time series are arranged in two pairs, which are run through sequentially

        if(ps == 1){pair <- pairs[[1]]}else{pair <- pairs[[2]]}

        for(pr in 1:2){                                                                                      # Each pair contains one isoprenaline time series (pr = 1), and one placebo (saline) time series (pr = 2), which are run through in order.
          beats_in <- beats_in.m
          new_direc <- paste(direc[1], "/", pair[pr], sep = "")

          if(pr == 2){subset <- "rep"}                                                                       # If on the 2nd (i.e placebo) run of a pair, subsetting boundaries from the preceding isoprenline time series are passed in.
          if(pr == 1){
            subset <- TRUE
            boundaries = NULL
          }

          ppg <- read.csv(new_direc, sep = "")                                                               # Load individual time series
          ppg <- data.frame(
            time = (0:(nrow(ppg)-1)) / samplingRate,
            ppg = ppg[,1]
          )
          names(ppg)[1] <- "time (s)"
          names(ppg)[2] <- "Detrended"


          n <- dim(ppg)[1]                                                                                   # Identify peaks in 1st derivative (if pk_thrshd needs adjusting, plot vpg and determine a
          vpg <- ppg[2:n,2] - ppg[1:(n-1),2]                                                                 # y-axis value above which all inflection points should be peaks, then set pk_thrshd to this value)
          beat <- data.frame(ppg[which(vpg[1:(n-1)] < pk_thrshd & vpg[2:n] >= pk_thrshd),1])
          rm(vpg)

          ppg[,2] = UnDetrend(ppg,factor=factor_value,offset=offset_value)                                   # Apply factor and offset (preprocessing parameters) (Visualise pre-processed time series with plot(ppg[,1],ppg[,2],t='l') if desired)


          undetrended <- ppg[, 2]                                                                            # Run main pipeline
          sfunction <- splinefun(1:length(undetrended), undetrended, method = "natural")
          deriv1 <- sfunction(seq(1, length(undetrended)), deriv = 1)
          spline1 <-  sfunction(seq(1, length(undetrended)), deriv = 0)
          splinePoly <- CubicInterpSplineAsPiecePoly(1:length(undetrended), undetrended, "natural")
          deriv1Poly <- CubicInterpSplineAsPiecePoly(1:length(undetrended), deriv1, "natural")
          inflexX <- solve(splinePoly, b = 0, deriv = 1)
          inflexY <- predict(splinePoly, inflexX)
          w <- find_w(d1p = deriv1Poly, deriv1 = deriv1, sp = splinePoly, sr = samplingRate)                 # Find peaks using custom peak detection algorithm
          uv <- find_u_v(wx = w$wX, wy = w$wY, d1 = deriv1, d1p = deriv1Poly,
                         spline = splinePoly, sr = samplingRate, plot=F)
          tmp <- find_o(wx = w$wX, inx = inflexX, iny = inflexY, d1p = deriv1Poly, sp = splinePoly)          # Identify O points (see main text)
          inflexX <- tmp[[1]]
          inflexY <- tmp[[2]]
          o_orig <- tmp[[3]]
          tmp <- preclean_wuv(w=w, uv=uv, o=o_orig, samp = samplingRate, sp = spline1, q = F)
          w <- tmp[[1]]
          uv <- tmp[[2]]
          o <- tmp[[3]]
          rm(tmp)
          baseCor <- baseline(inx = inflexX, iny = inflexY, o = o_orig,                                      # Correct baseline
                              dat = undetrended, sp = splinePoly, plot=F)
          sfunctionBC <- splinefun(1:length(baseCor), baseCor, method = "natural")
          deriv1BC <- sfunctionBC(seq(1, length(baseCor)), deriv = 1)
          spline1BC <- sfunctionBC(seq(1, length(baseCor)), deriv = 0)
          splinePolyBC <- CubicInterpSplineAsPiecePoly(1:length(baseCor), baseCor, "natural")
          deriv1PolyBC <- CubicInterpSplineAsPiecePoly(1:length(baseCor), deriv1BC, "natural")
          w$wY <- predict(splinePolyBC, w$wX)
          uv$uY <- predict(splinePolyBC, uv$uX)
          uv$vY <- predict(splinePolyBC, uv$vX)
          wuv <- cbind(w, uv)
          tmp <- clean_wuv(wuv = wuv, sp = splinePolyBC, inx = inflexX, o = o,
                           samp = samplingRate, bc = baseCor, q = F)
          wuv <- tmp[[1]]
          ibi <- tmp[[2]]
          oDiff <- tmp[[3]]
          rm(tmp, w, uv)
          waveLen <- round(median(oDiff)+15)
          ppg[, 2] <- baseCor
          tmp <- sep_beats(odiff = oDiff, bc = baseCor, samp = samplingRate, wuv = wuv, wvlen = waveLen,     # Generate individual beat segments +/- subsetting
                           ibi=ibi, o=o_orig, inx = inflexX, scale = T, q = F, subset, boundaries)           # Subsetting constraints (ppg_funcs -> sep_beats) were manually changed in some instances to ensure proper subsetting
          pulse <- tmp[[2]]
          avWave <- tmp[[1]]
          wuv <- tmp[[3]]
          rejects <- tmp[[4]]
          if(subset == T){
            boundaries <- tmp[[5]]
            ibi <- tmp[[6]]                                                                                  # Visualise baseline corrected time series with plot(ppg$`time (s)`[1:7500], ppg$Detrended[1:7500], type = "l"),
            }                                                                                                # then points(ppg[inflexX[o], 1], rep(0, length(inflexX[o]))), if desired
          rm(tmp)


          if(plot_aligned_waves == TRUE){                                                                    # Plot aligned waves and average wave
            pulse_stacked <- gather(pulse, key = "wave_ID", value = "values", -c("x"))
            average <- data.frame(seq((-141/(samplingRate*10)),
                                      ((waveLen*15 -9)-142)/(samplingRate*10), by = 1/(samplingRate*10)))
            average <- cbind(average, avWave)
            colnames(average)[1] <- "x"
            pl <- ggplot(data = pulse_stacked[-which(is.na(pulse_stacked[, 3])), ], aes(x, values, col = wave_ID), col = "black") +
              scale_color_manual(values = rep("black", ncol(pulse))) +
              geom_line(size = 1.5, alpha = ((1/length(wuv$wX)*10)-(1/length(wuv$wX)))) +
              geom_line(data = average[-which(is.na(average[, 2])), ], aes(x, avWave), size = 1.125, color = "red") +                       # y axis boundaries (ylim) will vary based on source data
              theme(legend.position = "none") + labs( y= "PPG Output", x = "Time (Seconds)") +
              # xlim(-0.1, 0.75) +                                                                             # + xlim(c(pulse$x[max(which(is.na(avWave)))], pulse$x[length(avWave)])) + ylim(range(avWave[!is.na(avWave)]*1.5))
              theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
                    panel.background = element_blank(), axis.line = element_line(colour = "black"),
                    axis.title = element_blank(),
                    axis.text = element_text(size = 24)) # + ylim(-0.6, 1.6)
            print(pl)
          }


          if(pr == 1){                                                                                       # Save rejected beats, waves carried forward (after cleaning/subsetting), and IBI values
            if(ps == 1){
              rejected_waves_list1[[run]] <- rejects
              waves_carried_forward1[run] <- boundaries[2]
              ibis_2mg1[[run]] <- ibi
            }else{
              rejected_waves_list3[[run]] <- rejects
              waves_carried_forward3[run] <- boundaries[2]
              ibis_2mg2[[run]] <- ibi
            }
          }else{
            if(ps==1){
              ibis_0mg1[[run]] <- ibi
            }else{
              ibis_0mg2[[run]] <- ibi
            }
          }


          beat <-  ppg[round(inflexX[wuv$o2]), 1]                                                            # Preliminary step for running the HED model (creating dataframe for model parameter outputs)
          nBeats <- length(beat)
          beat <- data.frame(
            beat = beat,
            dt = (1:nBeats)*0.0
          )
          beat <- AddOutput(beat)
          if(all_beats == T){
            batch_number <- floor(nrow(beat)/beats_in)
            remainder <- nrow(beat) - (batch_number*beats_in)
          }


          ppg$Baseline = 1:nrow(ppg) * 0                                                                     # Add output columns to ppg
          ppg$Excess   = 1:nrow(ppg) * 0
          ppg$Residue  = 1:nrow(ppg) * 0

          temp <- FindStartParams(batch_number, beats_in, beat, ppg, gs = model2.GetSegment,                 # Fill beat and ppg with starting parameters, estimated from the excess
                                  e = model2.Excess, sep = model2.SubtractExcessPeak,
                                  o_points = inflexX[o_orig], wuv = wuv, inflexX = inflexX, all_beats)
          beat <- temp[[1]]
          ppg <- temp[[2]]
          rm(temp)

          beat_orig <- beat
          fit_check <- list()

          if(run_hed == TRUE){                                                                               # Run the HED Model (number of waves to be modelled is specified above by all_beats / batch_number)
          for(k in 1:(batch_number+1)){

            if(all_beats == TRUE){
              if(k == batch_number+1){
                if(remainder == 0){break}
                beat <- beat_orig[(((k-1)*beats_in) + 1 ):(((k-1)*beats_in) + remainder), ]
                beats_in <- remainder
                w <- wuv$wX[(((k-1)*beats_in) + 1 ):(((k-1)*beats_in) + remainder)]
              }else{
                beat <- beat_orig[((k*beats_in)-(beats_in-1)):(k*beats_in), ]
                w <- wuv$wX[((k*beats_in)-(beats_in-1)):(k*beats_in)]
              }
            }else{
              beat <- beat_orig[((k*beats_in)-(beats_in-1)):(k*beats_in), ]
              w <- wuv$wX[((k*beats_in)-(beats_in-1)):(k*beats_in)]
            }

            w <- w / samplingRate
            renal_param <- median(beat$NTime)
            dias_param <- median(beat$DTime)
            sys_time <- beat$STime
            par <- as.numeric(beat[1,5:16])
            beat_start <- beat[, 3]
            beat_end <- beat[, 4]
            beat_vector <- list(beats_in, beat_start, beat_end)

            for(i in 1:4){                                                                                   # Refine parameters using downhill simplex method (Nelder and Mead, 1965) for four runs
              if(i == 1){new_beat <- beat}
              within_params <- FindWithinParams(beats_in, ppg, beat = new_beat, gs = model2.GetSegment,
                                                fp = model2.FixParams3, ms = simplex.MakeSimplex3,
                                                m2 = model2.ChiSq3, beat_vector = beat_vector,
                                                renal_param = renal_param, dias_param = dias_param,
                                                sys_time = sys_time, w = w)
              across_params <- simplex.MakeSimplex2(data=ppg, param = par, f = model2.ChiSq3,
                                                    inScale = 0.1, beat_vector = beat_vector,
                                                    beat = new_beat, renal_param = renal_param,
                                                    dias_param = dias_param, sys_time = sys_time, w = w)
              mat <- make_matrix(across_params, within_params)
              sim <- simplex.Run2(data = ppg, simplexParam = mat, f = model2.ChiSq3, optional=NULL,
                                  beat_vector = beat_vector, renal_param = renal_param,
                                  dias_param = dias_param, sys_time = sys_time, w = w, run = c("run", i))
              output <- extractOutput(beats_in, sim)
              fixed <- FixOutput(beats_in, beat = new_beat, ppg, gs = model2.GetSegment,
                                 fp = model2.FixParams3, across = output[[1]], within = output[[2]],
                                 sys_time = sys_time)
              new_beat <- UpdateBeat(beats_in, beat, fixed)
              new_beat <- FixBaseline(new_beat, f = model2.ChiSq4, renal_param, dias_param, sys_time, w)
            }


            fit_check[[k]] <- model2.ChiSq4(data = ppg, params = NULL, beats = beat_vector,                  # Assess goodness of fit (ChiSq, Max error, NRMSE, and aNRMSE)
                                            beat = new_beat, a = sim[1, ], plot = FALSE,
                                            renal_param = renal_param, dias_param = dias_param,
                                            sys_time = sys_time, w = w)

            beat2 <- new_beat                                                                                # Finalise model outputs for a given batch
            colnames(beat2) <- colnames(beat)
            beat2 <- beat2[, -c(1:4)]


            PlotFits(beats_in, ppg, beat2, gs = model2.GetSegment, rb = model2.Rebuild2)                     # Plots modelled waves (including component waves) with basic R plot function

            # GGplotFits(beats_in, ppg, beat2, gs = model2.GetSegment, rb = model2.Rebuild2,                 # Plots modelled waves in GGplot form (see main text figures)
            # run, pr, p = FALSE)

            if(k == 1){beat_final <- beat2}else{beat_final <- rbind(beat_final, beat2)}                      # Add model outputs from batch to dataframe containing all model outputs

          }
          }


          polyWave <- list()                                                                                 # Complete main pipeline to find fiducial points (OSND)
          for(i in 2:ncol(pulse)){
            polyWave[[i-1]] <-CubicInterpSplineAsPiecePoly(pulse$x, pulse[, i], "natural")
          }
          tmp <- diast_pk(avw = avWave, sr = samplingRate, scale = T)
          dPeak <- tmp[1]
          xShift <- tmp[2]
          rm(tmp)
          osnd <- osnd_of_average(avWave, dp = dPeak, diff = 0, sr = samplingRate, plot = F)
          if(dPeak == 5*samplingRate){
            dPeak <- osnd$x[4]*1.2
          }
          if((osnd$x[4]-osnd$x[3]) < 1.5 & (osnd$x[4]-osnd$x[3]) > 0){
            dPeak <- dPeak*0.95
            osnd <- osnd_of_average(avWave, dp = dPeak, diff = 0, sr = samplingRate, plot = FALSE)
          }
          scale <- 1                                                                                         # Set to 1 if scaling (ideally this should be optional at the beginning, note other functions where scaling is optional would need to be incorporated)
          osnd_all <- list()
          for(i in 2:ncol(pulse)){
            wavi <- pulse[, i][!is.na(pulse[, i])]
            if(scale == 1){
              xShift2 <- (which(abs(wavi - 0.5) == min(abs(wavi - 0.5))))
            }else{
              xShift2 <- which.min(abs(wavi))
            }
            diff <- xShift - xShift2
            dpa <- dPeak - diff
            osnd_all[[i-1]] <- osnd_of_average(aw = wavi, dp = dpa, diff = diff,
                                               sr = samplingRate, plot = F)
          }


          if(plot_osnd == TRUE){                                                                             # Plot all OSND values against the average
            plot(avWave[!is.na(avWave)], type = "l") + for(i in 1:length(osnd_all)){
              points(osnd_all[[i]][4, 1], osnd_all[[i]][4, 2], col = "blue")
              points(osnd_all[[i]][3, 1], osnd_all[[i]][3, 2], col = "red")
              points(osnd_all[[i]][2, 1], osnd_all[[i]][2, 2])
              points(osnd_all[[i]][1, 1], osnd_all[[i]][1, 2])
            }
          }


          for(i in 1:length(osnd_all)){                                                                      # Extract morphological features
            osnd_all[[i]]$y <- osnd_all[[i]]$y - osnd_all[[i]]$y[1]
          }
          features <- feature_extract(oa = osnd_all, p = pulse, pw = polyWave)


          beat_final <- cbind(beat_orig[1:nrow(beat_final), 1:4], beat_final)                                # Finalise model outputs (add first four columns)

          if(run_hed == TRUE){
            osnd_fits <- osnd_fit(beat_final, ppg, plot = F)                                                 # Calculate model error in recapitulation of fiducial points (OSND)
          }


           if(ps == 1){                                                                                       # All measures from time series 1-4 are outputted in turn for each participant
            if(pr == 1){otpt <- 1}else{otpt <- 2}
          }else{
            if(pr == 1){otpt <- 3}else{otpt <- 4}
          }
          if(run_hed == FALSE){
            fit_check <- list(c(1:100), c(1:100), c(1:100))
          }
          temp <- ArrangeOutputs(beat_final, beat_orig, features, pulse, fit_check, ps, pr)
          Outputs[[otpt]] <- list(temp[[1]], temp[[2]], temp[[3]], osnd_fits, osnd_all, avWave, osnd, pulse, polyWave)

        }
      }

      AllOutputs[[run]] <- Outputs                                                                           # Each participant output list is aggregated into a parent list

    },
    error = function(e){
      message('Caught an error!')
      message(e)
      return(e)
    },
    warning = function(w){
      message('Caught a warning!')
      message(w)
      return(w)
    },
    finally = {
      message('All done with current participant.')
    }

  )

}


PlotRejects(rejected_waves_list1, rejected_waves_list3)                                                      # Plot number of waves post cleaning and rejected waves
PlotWavesCarriedForward(waves_carried_forward1, waves_carried_forward3)

non_null_names <- which(!sapply(errors, is.null))                                                            # Check for any errors (participants who failed to run)
errors <- errors[non_null_names]
names(errors) <- non_null_names
print(errors)

save(AllOutputs, file = "~/Desktop/AllOutputs.RData")                                                        # Save main outputs (morphological features + HED model outputs)


save(rejected_waves_list1, file = "~/Desktop/rejected_waves_list1.RData")                                    # Save additional outputs
save(rejected_waves_list3, file = "~/Desktop/rejected_waves_list3.RData")

save(waves_carried_forward1, file = "~/Desktop/waves_carried_forward1.RData")
save(waves_carried_forward3, file = "~/Desktop/waves_carried_forward3.RData")

save(ibis_0mg1, file = "~/Desktop/ibis_0mg1.RData")
save(ibis_0mg2, file = "~/Desktop/ibis_0mg2.RData")
save(ibis_2mg1, file = "~/Desktop/ibis_2mg1.RData")
save(ibis_2mg2, file = "~/Desktop/ibis_2mg2.RData")


# Refer to analysis script for further analysis of these outputs