-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGeneral_Purpose_script
More file actions
359 lines (277 loc) · 19.2 KB
/
General_Purpose_script
File metadata and controls
359 lines (277 loc) · 19.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#### General-Purpose Script: ####
# This script contains the main PPG processing pipeline and the HED model, and is equivalent in structure to figure 4 of the main text.
# Running the HED model is optional; it can be bypassed if only traditional morphological features are desired.
# This script is general purpose, meaning the following steps may be needed for any new data:
# 1. Pre-processing: If not using Bioradio hardware, an alternative preprocessing step may be required.
# 2. Multiple time series: If multiple time series in a dataset are to be analysed, an additional looping structure will be required.
# 3. Setting script parameters: These can be adjusted as required to fit new data; sampling rate and pk_threshld are likely to need changing with new hardware sources.
# Prelims:
# Download and save scripts from the PulseWaveform repository to a single folder
# Set working directory to that folder
# Ensure the below packages (tidyverse etc) are installed
# Set starting parameters (samplingRate etc) as required
# pk_thrshd must be set appropriately for the script to run. If using new data, run the script up to line 78, run plot(vpg, t = "l"), and select an appropriate objective threshold above which all inflection points are peaks
setwd("Desktop/Scripts") # (alter file path as required)
library(tidyverse)
library(splines2)
library(pracma)
library(SplinesUtils)
library(spectral)
library(DescTools)
library(zoo)
library(readr)
source("~/Desktop/Scripts/PPG_processing_functions.R") # (alter file path as required)
source("~/Desktop/Scripts/HED_functions.R") # (alter file path as required)
samplingRate <- 75 # Sampling Rate
pk_thrshd <- 0.2 # Objective threshold for initial identification of peaks in the 1st derivative
plot_aligned_waves <- TRUE # Visualise entire waveform sample (including average) for overview of waveform morphology / variability
plot_osnd <- TRUE # Visualise OSND points of all waveforms in relation to the average wave of the sample
run_hed <- TRUE # Run the HED Model (set to false if only traditional morphological features are desired)
simplex_iterations <- 1000 # Number of simplex iterations for each run of the simplex. Higher iterations increase model accuracy whilst increasing computational time (defailt 20000).
all_beats <- TRUE # Setting to true ensures all available beats in a given time series are modeled
batch_number <- 10 # If all beats = false, determines the number of batches of waves to process
beats_in <- 10 # Number of beats in a batch, over which the width, reflectance timing, and decay rate parameters will be fixed (set to 1 to model each waveform independantly (with all 12 parameters free))
AllOutputs <- list() # For storing all outputs
###################################################################################
# Load in Data #
###################################################################################
data <- read.csv("~/Desktop/PulseAnalysis/Data/Craig/source.csv", header = T) # Read in time series data (alter file path as required)
###################################################################################
# Preprocessing #
###################################################################################
undetrended_data <- data.frame(preproc(dat=data)) # Preprocessing (Bioradio specific): downsampling and undetrending
ppg <- undetrended_data # Express preprocessed data as data.frame (and add time column)
ppg <- data.frame(
time = (0:(nrow(ppg)-1)) / samplingRate,
ppg = ppg[,1]
)
names(ppg)[1] <- "time (s)"
names(ppg)[2] <- "Detrended"
n <- dim(ppg)[1] # Identify peaks in 1st derivative (if pk_thrshd needs adjusting, plot(vpg, t = "l") and determine a
vpg <- ppg[2:n,2] - ppg[1:(n-1),2] # y-axis value above which all inflection points should be peaks, then set pk_thrshd to this value)
beat <- data.frame(ppg[which(vpg[1:(n-1)] < pk_thrshd & vpg[2:n] >= pk_thrshd),1])
rm(vpg)
###################################################################################
# Pipeline: Beat Segmentation #
###################################################################################
undetrended <- ppg[, 2]
sfunction <- splinefun(1:length(undetrended), undetrended, method = "natural")
deriv1 <- sfunction(seq(1, length(undetrended)), deriv = 1)
spline1 <- sfunction(seq(1, length(undetrended)), deriv = 0)
splinePoly <- CubicInterpSplineAsPiecePoly(1:length(undetrended), undetrended, "natural")
deriv1Poly <- CubicInterpSplineAsPiecePoly(1:length(undetrended), deriv1, "natural")
inflexX <- solve(splinePoly, b = 0, deriv = 1)
inflexY <- predict(splinePoly, inflexX)
w <- find_w(d1p = deriv1Poly, deriv1 = deriv1, sp = splinePoly, sr = samplingRate) # Find peaks using custom peak detection algorithm
uv <- find_u_v(wx = w$wX, wy = w$wY, d1 = deriv1, d1p = deriv1Poly,
spline = splinePoly, sr = samplingRate, plot=F)
tmp <- find_o(wx = w$wX, inx = inflexX, iny = inflexY, d1p = deriv1Poly, sp = splinePoly) # Identify O points (see main text)
inflexX <- tmp[[1]]
inflexY <- tmp[[2]]
o_orig <- tmp[[3]]
tmp <- preclean_wuv(w=w, uv=uv, o=o_orig, samp = samplingRate, sp = spline1, q = F)
w <- tmp[[1]]
uv <- tmp[[2]]
o <- tmp[[3]]
rm(tmp)
baseCor <- baseline(inx = inflexX, iny = inflexY, o = o_orig, # Correct baseline
dat = undetrended, sp = splinePoly, plot=F)
sfunctionBC <- splinefun(1:length(baseCor), baseCor, method = "natural")
deriv1BC <- sfunctionBC(seq(1, length(baseCor)), deriv = 1)
spline1BC <- sfunctionBC(seq(1, length(baseCor)), deriv = 0)
splinePolyBC <- CubicInterpSplineAsPiecePoly(1:length(baseCor), baseCor, "natural")
deriv1PolyBC <- CubicInterpSplineAsPiecePoly(1:length(baseCor), deriv1BC, "natural")
w$wY <- predict(splinePolyBC, w$wX)
uv$uY <- predict(splinePolyBC, uv$uX)
uv$vY <- predict(splinePolyBC, uv$vX)
wuv <- cbind(w, uv)
tmp <- clean_wuv(wuv = wuv, sp = splinePolyBC, inx = inflexX, o = o,
samp = samplingRate, bc = baseCor, q = F)
wuv <- tmp[[1]]
ibi <- tmp[[2]] # Before removal of waveforms with poor quality morphology, IBI intervals (which only require peak detection) are stored
oDiff <- tmp[[3]]
rm(tmp, w, uv)
waveLen <- round(median(oDiff)+15)
ppg[, 2] <- baseCor
tmp <- sep_beats(odiff = oDiff, bc = baseCor, samp = samplingRate, wuv = wuv, wvlen = waveLen, # Generate individual beat segments +/- subsetting
ibi=ibi, o=o_orig, inx = inflexX, scale = T, q = F, subset = FALSE, boundaries)
avWave <- tmp[[1]]
pulse <- tmp[[2]]
wuv <- tmp[[3]]
rejects <- tmp[[4]] # Visualise baseline corrected time series with plot(ppg$`time (s)`[1:7500], ppg$Detrended[1:7500], type = "l"), # then points(ppg[inflexX[o], 1], rep(0, length(inflexX[o]))), if desired
rm(tmp)
if(plot_aligned_waves == TRUE){ # Plot aligned waves and average wave
pulse_stacked <- gather(pulse, key = "wave_ID", value = "values", -c("x"))
average <- data.frame(seq((-141/(samplingRate*10)),
((waveLen*15 -9)-142)/(samplingRate*10),
by = 1/(samplingRate*10)))
average <- cbind(average, avWave)
colnames(average)[1] <- "x"
pl <- ggplot(data = pulse_stacked[-which(is.na(pulse_stacked[, 3])), ],
aes(x, values, col = wave_ID), col = "black") +
scale_color_manual(values = rep("black", ncol(pulse))) +
geom_line(size = 1.5, alpha = ((1/length(wuv$wX)*10)-(1/length(wuv$wX)))) +
geom_line(data = average[-which(is.na(average[, 2])), ],
aes(x, avWave), size = 1.125, color = "red") + # y axis boundaries (ylim) will vary based on source data
theme(legend.position = "none") + labs( y= "PPG Output", x = "Time (Seconds)") +
# xlim(-0.1, 0.75) + # + xlim(c(pulse$x[max(which(is.na(avWave)))], pulse$x[length(avWave)])) + ylim(range(avWave[!is.na(avWave)]*1.5))
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"),
axis.title = element_blank(),
axis.text = element_text(size = 24)) # + ylim(-0.6, 1.6)
print(pl)
}
###################################################################################
# Pulse Decomposition Modeling: The HED Model #
###################################################################################
if(run_hed == TRUE){
beat <- ppg[round(inflexX[wuv$o2]), 1] # Preliminary step for running the HED model (creating dataframe for model parameter outputs)
nBeats <- length(beat)
beat <- data.frame(
beat = beat,
dt = (1:nBeats)*0.0
)
beat <- AddOutput(beat)
if(all_beats == T){
batch_number <- floor(nrow(beat)/beats_in)
remainder <- nrow(beat) - (batch_number*beats_in)
}
ppg$Baseline = 1:nrow(ppg) * 0 # Add output columns to ppg
ppg$Excess = 1:nrow(ppg) * 0
ppg$Residue = 1:nrow(ppg) * 0
temp <- FindStartParams(batch_number, beats_in, beat, ppg, gs = model2.GetSegment, # Fill beat and ppg with starting parameters, estimated from the excess
e = model2.Excess, sep = model2.SubtractExcessPeak,
o_points = inflexX[o_orig], wuv = wuv, inflexX = inflexX, all_beats)
beat <- temp[[1]]
ppg <- temp[[2]] # PPG now contains columns for excess and residual elements from FindStartParams
rm(temp)
beat_orig <- beat
fit_check <- list()
for(k in 1:(batch_number+1)){
if(all_beats == TRUE){
if(k == batch_number+1){
if(remainder == 0){break}
beat <- beat_orig[(((k-1)*beats_in) + 1 ):(((k-1)*beats_in) + remainder), ]
beats_in <- remainder
w <- wuv$wX[(((k-1)*beats_in) + 1 ):(((k-1)*beats_in) + remainder)]
}else{
beat <- beat_orig[((k*beats_in)-(beats_in-1)):(k*beats_in), ]
w <- wuv$wX[((k*beats_in)-(beats_in-1)):(k*beats_in)]
}
}else{
beat <- beat_orig[((k*beats_in)-(beats_in-1)):(k*beats_in), ]
w <- wuv$wX[((k*beats_in)-(beats_in-1)):(k*beats_in)]
}
w <- w / samplingRate
renal_param <- median(beat$NTime)
dias_param <- median(beat$DTime)
sys_time <- beat$STime
par <- as.numeric(beat[1,5:16])
beat_start <- beat[, 3]
beat_end <- beat[, 4]
beat_vector <- list(beats_in, beat_start, beat_end)
for(i in 1:4){ # Refine parameters using downhill simplex method (Nelder and Mead, 1965) for four runs
if(i == 1){new_beat <- beat}
within_params <- FindWithinParams(beats_in, ppg, beat = new_beat, gs = model2.GetSegment,
fp = model2.FixParams3, ms = simplex.MakeSimplex3,
m2 = model2.ChiSq3, beat_vector = beat_vector,
renal_param = renal_param, dias_param = dias_param,
sys_time = sys_time, w = w)
across_params <- simplex.MakeSimplex2(data=ppg, param = par, f = model2.ChiSq3,
inScale = 0.1, beat_vector = beat_vector,
beat = new_beat, renal_param = renal_param,
dias_param = dias_param, sys_time = sys_time, w = w)
mat <- make_matrix(across_params, within_params)
sim <- simplex.Run2(data = ppg, simplexParam = mat, f = model2.ChiSq3, optional=NULL,
beat_vector = beat_vector, renal_param = renal_param,
dias_param = dias_param, sys_time = sys_time, ms = simplex_iterations,
w = w, run = c("run", i))
output <- extractOutput(beats_in, sim)
fixed <- FixOutput(beats_in, beat = new_beat, ppg, gs = model2.GetSegment,
fp = model2.FixParams3, across = output[[1]], within = output[[2]],
sys_time = sys_time)
new_beat <- UpdateBeat(beats_in, beat, fixed)
new_beat <- FixBaseline(new_beat, f = model2.ChiSq4, renal_param, dias_param, sys_time, w)
}
fit_check[[k]] <- model2.ChiSq4(data = ppg, params = NULL, beats = beat_vector, # Assess goodness of fit (ChiSq, Max error, NRMSE, and aNRMSE)
beat = new_beat, a = sim[1, ], plot = FALSE,
renal_param = renal_param, dias_param = dias_param,
sys_time = sys_time, w = w)
beat2 <- new_beat # Finalise model outputs for a given batch
colnames(beat2) <- colnames(beat)
beat2 <- beat2[, -c(1:4)]
PlotFits(beats_in, ppg, beat2, gs = model2.GetSegment, rb = model2.Rebuild2) # Plots modelled waves (including component waves) with basic R plot function
# GGplotFits(beats_in, ppg, beat2, gs = model2.GetSegment, rb = model2.Rebuild2, # Plots modelled waves in GGplot form (see main text figures)
# run = 1, pr = 1, p = T)
if(k == 1){beat_final <- beat2}else{beat_final <- rbind(beat_final, beat2)} # Add model outputs from batch to dataframe containing all model outputs
}
}
###################################################################################
# Pipeline continued: Morphological Feature extraction #
###################################################################################
polyWave <- list() # Find fiducial points (OSND)
for(i in 2:ncol(pulse)){
polyWave[[i-1]] <-CubicInterpSplineAsPiecePoly(pulse$x, pulse[, i], "natural")
}
tmp <- diast_pk(avw = avWave, sr = samplingRate, scale = T)
dPeak <- tmp[1]
xShift <- tmp[2]
rm(tmp)
osnd <- osnd_of_average(avWave, dp = dPeak, diff = 0, sr = samplingRate, plot = F)
if(dPeak == 5*samplingRate){
dPeak <- osnd$x[4]*1.2
}
if((osnd$x[4]-osnd$x[3]) < 1.5 & (osnd$x[4]-osnd$x[3]) > 0){
dPeak <- dPeak*0.95
osnd <- osnd_of_average(avWave, dp = dPeak, diff = 0, sr = samplingRate, plot = FALSE)
}
scale <- 1 # Set to 1 if scaling (ideally this should be optional at the beginning, note other functions where scaling is optional would need to be incorporated)
osnd_all <- list()
for(i in 2:ncol(pulse)){
wavi <- pulse[, i][!is.na(pulse[, i])]
if(scale == 1){
xShift2 <- (which(abs(wavi - 0.5) == min(abs(wavi - 0.5))))
}else{
xShift2 <- which.min(abs(wavi))
}
diff <- xShift - xShift2
dpa <- dPeak - diff
osnd_all[[i-1]] <- osnd_of_average(aw = wavi, dp = dpa, diff = diff,
sr = samplingRate, plot = F)
}
if(plot_osnd == TRUE){ # Plot all OSND values against the average
plot(avWave[!is.na(avWave)], type = "l") + for(i in 1:length(osnd_all)){
points(osnd_all[[i]][4, 1], osnd_all[[i]][4, 2], col = "blue")
points(osnd_all[[i]][3, 1], osnd_all[[i]][3, 2], col = "red")
points(osnd_all[[i]][2, 1], osnd_all[[i]][2, 2])
points(osnd_all[[i]][1, 1], osnd_all[[i]][1, 2])
}
}
for(i in 1:length(osnd_all)){ # Extract morphological features
osnd_all[[i]]$y <- osnd_all[[i]]$y - osnd_all[[i]]$y[1]
}
features <- feature_extract(oa = osnd_all, p = pulse, pw = polyWave)
if(run_hed == TRUE){
beat_final <- cbind(beat_orig[1:nrow(beat_final), 1:4], beat_final) # Finalise model outputs (add first four columns)
osnd_fits <- osnd_fit(beat_final, ppg, plot = F) # Calculate model error in recapitulation of fiducial points (OSND)
}
###################################################################################
# Output #
###################################################################################
if(run_hed == FALSE){fit_check <- list(c(1:100), c(1:100), c(1:100))}
temp <- ArrangeOutputs(beat_final, beat_orig, features, pulse, fit_check)
nBeats <- ncol(pulse) - 1
AllOutputs <- list(nBeats, ibi, rejects, pulse, polyWave, osnd_all, avWave, osnd, temp[[2]], temp[[1]], temp[[3]], osnd_fits)
# Key to All Outputs:
# [[1]] == nBeats Number of waveforms in sample
# [[2]] == ibi Inter-beat intervals (note pre-interpolation)
# [[3]] == rejects Rejected beats, arranged according to reason for exclusion (1. excessively long waveforms, 2. excessively short waveforms, 3. bi-peaked waveforms, 4. waveforms that begin to enter a second systolic upstroke, 5. waveforms that drop significantly below baseline, 6. waveforms with significant variability in relation to the average, and 7. waveforms that are significantly different in morphology than the average)
# [[4]] == pulse Individual waveforms in the sample (discrete form)
# [[5]] == polyWave Individual waveforms in the sample (polynomial spline form)
# [[6]] == osnd_all OSND points of individual waves
# [[7]] == avWave Average waveform of the sample
# [[8]] == osnd (of average) OSND points of the average waveform
# [[9]] == features Morphological features derived from OSND points (see supplementary material)
# [[10]] == beat_final Model parameter Outputs
# [[11]] == fit_check Goodness of fit Measures (ChiSq, Max error, NRMSE, aNRMSE)
# [[12]] == osnd_fits Error values (x and y) in recapitulation of OSND points