PulseWaveform/PPG_processing_functions at master · Algebreaker/PulseWaveform · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# PPG function list:

# 1. Preproc  (note for bioradio data only)
# 2. find_w
# 3. find_u_v
# 4. find_o
# 5. preclean_wuv
# 6. Baseline
# 7. Clean_wuv
# 8. sep_beats
# 9. find_average
# 10. find_sd
# 11. diast_pk
# 12. osnd_of_average
# 13. feature_extract


preproc <- function(data){
  dat<-data[!(data$PPG.PulseOx1=='NaN'),]

  #Downsample
  # The BioRadio device provides 250 samples per second, but the PPG is only sampled 75 times per second,
  # so we typically have repeated values in a pattern of 3-3-4 repeating. DownSample tries to retrieve the
  # unique values, with an effort to be robust against variation in the repeat pattern and also against
  # genuine repeated values.

  list<-rle(dat$PPG.PulseOx1)
  ID <- rep(1:length(list$values), times = list$lengths)
  data_downsampled <- c()

  nSrc <- nrow(dat)
  iDst <- 1
  iSrc <- 1
  iVal <- 1
  print("Deduplicating data...")
  if(TRUE){ # Mode switch in case new method is worse or somehow broken
    startClock <- Sys.time()
    reportClock <- 10

    data_downsampled <- cbind(dat, ID)

    while (iSrc <= nSrc){
      if (as.double(difftime(Sys.time(),startClock,unit="secs"))>reportClock){
        min <- as.integer(reportClock/60)
        sec <- reportClock %% 60
        time <- paste("[",min,":",sep="")
        if (sec == 0){ time <- paste(time,"00]",sep="")} else { time <- paste(time,sec,"]",sep="")}
        print(paste(time,iSrc,"of",nSrc))
        reportClock <- reportClock + 10
      }

      data_downsampled[iDst,] <- data_downsampled[iSrc,]
      iDst <- iDst + 1
      if (list$lengths[iVal] <= 4){
        iSrc <- iSrc + list$lengths[iVal]
        iVal <- iVal + 1
      } else {
        iSrc <- iSrc + 4
        list$lengths[iVal] <- list$lengths[iVal] - 4
      }
    }

    # Trim downsampled data to size
    data_downsampled <- data_downsampled[1:(iDst-1),]

  }else{ # Old method, using rbind which gets slow

    data2 <- cbind(dat, ID)
    data_downsampled <-c()

    for (i in 1:max(ID)){
      sub.data <- dplyr::filter(data2, ID == i)
      if(nrow(sub.data) <= 4){
        data_downsampled <- rbind(data_downsampled, sub.data[1,])
      }else if(nrow(sub.data) > 4 ){data_downsampled <- rbind(data_downsampled, sub.data[1,], sub.data[5,])}
    }

  } # End of method selector

  print("Removing DC blocker...")
  #Undetrend
  # Analysis of device output indicates that the PPG signal is detrended by application of the following
  # formula: OUT[i] = 80 + (OUT[i-1]-80) * 0.96875 + (IN[i] - [IN[i-1]), where the constant 0.96875 is
  # an approximation fitted to the data.
  # Individual pulse events are more comprehensible if the detrending is not used, so this function
  #removes it by inverting the above function.
  undetrended <-replicate(length(data_downsampled$PPG.PulseOx1)-1, 0)
  undetrended<-c(data_downsampled$PPG.PulseOx1[1],undetrended) #add first detrended value to vector
  for (i in 2:length(data_downsampled$PPG.PulseOx1)){
    undetrended[i]<-((data_downsampled$PPG.PulseOx1[i]-80) - ((data_downsampled$PPG.PulseOx1[i-1]-80) * 0.96875) + (undetrended[i-1]))
  }
  print("Done")
  return(undetrended)
}


find_w <- function(d1p, deriv1, sp, sr){
  ########################################################################################################################################
  # FindW identifies peaks in the first derivative of the PPG time series (denoted w on the original PPG pulse wave (Elgendi et al, 2018)).
  # A rolling window relative to heart rate is applied to identify beats and artefacts.
  # Peaks identified within a window are confirmed as genuine / artefactual with a series of checks against thresholds derived from beats
  # local to the window and across the entire time series.

  # Inputs:
  # d1p (polynomial spline of first derivative of the PPG time series)
  # deriv1 (first derivative in discrete form)
  # sp (polynomial spline of original time series)
  # sr (sample rate)

  # Outputs:
  # wX (x axis coordinates of all w points)
  # wY (y axis coordinates of all w points on original PPG trace)
  # wYD1 (y axis coordinates of all w points on 1st derivative trace)
  ########################################################################################################################################

  d1InflxX <- solve(d1p, b = 0, deriv = 1)                 # Find inflection points on 1st deriv
  d1InflxY <- predict(d1p, d1InflxX)
  wX <- c()                                                # Create vectors for w values to be stored
  wY <- c()
  window <- list()                                         # Define window within which to identify peaks
  prevPkDist <- c()                                        # Create vector to store peak to peak distances

  ############# Identify first two peaks: #############

  a <- 2                                                                                                                                        # Start with a very small window (spanning three (1 + a) inflection points),
  while(length(wX) < 2){                                                                                                                        # and widen until two peaks are identified within the window

    firstWindow <- data.frame(d1InflxX[1]:d1InflxX[1+a], deriv1[d1InflxX[1]:d1InflxX[1+a]])                                                     # Create window
    windowInflxY <- d1InflxY[which(d1InflxX > firstWindow[1, 1] & d1InflxX < firstWindow[, 1][length(firstWindow[, 1])])]                       # Find the inflection points which fall within the window
    windowInflxX <- d1InflxX[which(d1InflxX > firstWindow[1, 1]  & d1InflxX < firstWindow[, 1][length(firstWindow[, 1])])]
    threshold <- quantile(firstWindow[, 2], probs=c(.95))                                                                                       # Define a threshold for finding peaks, of 0.95
    windowPks <- which(windowInflxY > threshold)                                                                                                # Identify inflection points above the threshold as peaks

    if(length(windowPks) == 2 & windowInflxX[windowPks[2]] - windowInflxX[windowPks[1]] > sr){                                                                                      # If the first two peaks indicate a heart rate < 60 bpm,
      confirmWindow <- data.frame(d1InflxX[windowPks[1]+2]:d1InflxX[windowPks[2]-2],   deriv1[d1InflxX[windowPks[1]+2]:d1InflxX[windowPks[2]-2]])       # ensure a peak between them has not been missed with the current threshold
      inflxConformY <- d1InflxY[which(d1InflxX > confirmWindow[1, 1] & d1InflxX < confirmWindow[, 1][length(confirmWindow[, 1])])]
      inflxConformX <- d1InflxX[which(d1InflxX > confirmWindow[1, 1]  & d1InflxX < confirmWindow[, 1][length(confirmWindow[, 1])])]                     # Find inflection points within the window (defined as between peaks 1 and 2)
      threshold <- max(quantile(confirmWindow[, 2], probs=c(.95)), windowInflxY[windowPks[1]]/2)
      missedPks <- inflxConformX[which(inflxConformY > threshold)]                                                                                      # Identify peaks above threshold (and greater than 50% of the height of the first peak)
      missedPks <- which(windowInflxX == missedPks[1])                                                                                                  # Identify the inflection point in the original window which corresponds to the missed peak
      if(length(missedPks) > 0){windowPks[2] <- missedPks[1]}                                                                                                                       # Assign the missed peak as the second peak
    }

    if(length(windowPks) == 2 & windowInflxX[windowPks[2]] - windowInflxX[windowPks[1]] < (sr/10)){                                                                         # Rarely, the first peak contains two inflection points above threshold,
      windowPks <- windowPks[-2]                                                                                                                # if peaks are too close (< 1/10th of a second), the second peak is discounted
    }

    windowPksY <- windowInflxY[windowPks]                                                                                                       # Identify the inflection points corresponding to peaks
    windowPks <- windowInflxX[windowPks]

    if(length(windowPks) > 2 & (windowPks[3] - windowPks[1] < 1)){                                                                              # Very rarely, the first peak contains three inflection points above threshold. Discount two.
      windowPks <- windowPks[-c(1, 3)]
    }

    m <- mean(d1InflxY[1:(1+a)])                                                                                                                # Calculate mean and SD of inflection points within the window
    std <- sd(d1InflxY[1:(1+a)])

    if(length(windowPks) == 2){                                                                                                                 # If two peaks are found, confirm that they are both significantly higher than surrounding
      if(windowPksY[1] > (m + (1.5*std)) & windowPksY[1] > (median(deriv1[1:100])+std(deriv1[1:100]))){                                         # inflection points, and that the second peak is greater than half the height of the first
        wX[1] <-  windowPks[1]                                                                                                                  # peak (unless the first peak is an artefact)
        if(windowPksY[2] > (m + (1.5*std)) & windowPksY[2] > (windowPksY[1]/2) | windowPksY[1] > (mean(deriv1) + (5*std(deriv1)))){
          wX[2] <-  windowPks[2]
        }
      }
    }

    if(length(windowPks) > 3){                                                                                                                  # If more than 3 peaks identified, assume the time series begins with an artefact and skip
      d1InflxX <- d1InflxX[-c(1:(sr*(100/75)))]                                                                                                 # forward, resetting a.
      d1InflxY <- d1InflxY[-c(1:(sr*(100/75)))]
      a <- -3
      print("skipped")
    }

    a <- a + 5                                                                                                                                  # Increases until two legitimate peaks are found
  }

  ############# Find remaining peaks (with dynamic moving window dependent on peak to peak distance): #############

  artefacts <- c()                                                                                                                              # Create vector to store artefactual peaks
  m <- mean(d1InflxY)                                                                                                                           # Calculate mean and SD of inflection points across the time series
  std <- sd(d1InflxY)
  for(i in 3:length(d1InflxX)){
    prevPkDist[i] <- wX[length(wX)] - wX[length(wX)-1]                                                                                          # Peak to peak distance defined as distance between two previous peaks
    if(prevPkDist[i] > (mean(prevPkDist[!is.na(prevPkDist)])*2)){                                                                               # Identify abnormally large peak to peak distances (this can be because
      prevPkDist[i] <- prevPkDist[3]                                                                                                            # there was a large gap between an artefact and the next legitimate peak)
    }                                                                                                                                           # and correct them by resetting the value (this avoids leapfrogging)
    if((wX[length(wX)] + (4*prevPkDist[i]) > length(deriv1))){
      break                                                                                                                                     # If the next window goes beyond the length of the data, break the loop
    }
    windowPks <- c()
    windowExtnd <- 1.35                                                                                                                         # Define parameters by which to extend the window or move it forward
    windowStart <- 0.5
    printed <- NA
    if(length(artefacts) > 0){                                                                                                                  # Recalculate mean and standard deviation after removing any artefacts identified
      remove <- c()
      for(j in artefacts[which(artefacts < length(wX))]){
        remove[j] <- which(abs(d1InflxX - wX[j]) == min(abs(d1InflxX - wX[j])))
      }
      remove <- remove[!is.na(remove)]
      newRem <- c()
      for(j in 1:length(remove)){
        newRem[(length(newRem)+1):(length(newRem)+21)] <- (remove[j] -10): (remove[j] + 10)                                                     # Remove inflection points around artefact peaks
      }
      if(sum(newRem < 1) > 0){
        newRem <- newRem[-(which(newRem < 1))]
      }
      m <- mean(d1InflxY[-newRem])
      std <- sd(d1InflxY[-newRem])
    }

    while(length(windowPks) < 1){                                                                                                               # Each subsequent window will adjust (if required) until the next peak is detected

      if(windowExtnd > 10){
        windowStart <- 2
        windowExtnd <- 2.5
      }

      window[[i]] <- data.frame((wX[length(wX)] + windowStart*prevPkDist[i]):(wX[length(wX)] + windowExtnd*prevPkDist[i]),                      # Define the window initially as from (the previous peak + (peak to peak distance / 2))
                     deriv1[(wX[length(wX)] + windowStart*prevPkDist[i]):(wX[length(wX)] + windowExtnd*prevPkDist[i])])                         # to (previous peak + (peak to peak distance * 1.35))
      windowInflxY <- d1InflxY[which(d1InflxX > window[[i]][1, 1] & d1InflxX < window[[i]][length(window[[i]][, 1]), 1])]
      windowInflxX <- d1InflxX[which(d1InflxX > window[[i]][1, 1] & d1InflxX < window[[i]][length(window[[i]][, 1]), 1])]                       # Identify which inflection points are within the window
      threshold <- quantile(window[[i]][, 2], probs=c(0.95))                                                                                    # Define threshold
      windowPks <- which(windowInflxY > threshold)                                                                                              # Identify peaks
      windowPksY <- windowInflxY[windowPks]
      windowPks <- windowInflxX[windowPks]

      # plot(window[[i]], type = "l")                                                                                                           # For debugging purposes, the window can be plotted
      # points(windowPks, windowPksY, pch = 19)

      ############# Window Peak Confirmation Criteria: #############

      if(length(windowPks) > 2){                                                                                                                # If more than 2 peaks are identified within the window, this can be due to four reasons:
        if(max(windowPksY) > (m+(2*std)) |  max(windowPksY) > (mean(deriv1[windowPks[1]:(windowPks[1]                                           # 1. a window does not include a genuine peak, and there are multiple secondary ones of similar height
                           + (3*prevPkDist[i]))]) + 2*std(deriv1[windowPks[1]:(windowPks[1] + (3*prevPkDist[i]))]))){                           # 2. there is an artefact with more than two peaks
                                   wX[length(wX)+1] <- windowPks[which(windowPksY == max(windowPksY))]                                          # 3. there are significantly large secondary peaks that also exceed the threshold for identification
                                   lowPks <- windowPksY[order(windowPksY)[1:2]]                                                                 # 4. a genuine peak has multiple inflection points
          if(lowPks[1] > m+(2*std) & lowPks[2] > m+(2*std)
             & (windowPks[3] - windowPks[1]) > (prevPkDist[i]/10)){                                                                             # Check if the maximum peak is above a threshold relative to the time series (no 1)
               cat('\n','Potential artefact',  ', plot(', (wX[i-1]-100), ':', (wX[i-1]+300), ', deriv1[', (wX[i-1]-100), ':', (wX[i-1]+300),    # If it is, check if both lower peaks also exceed the threshold,
               '], type = "l") ,', 'wave', i, '+/- 2 removed because the non-max peaks were high')                                              # if so mark them as artefactual (no 2), if not assume they are secondary peaks (no 3)
               artefacts[length(artefacts) + c(1, 2, 3, 4, 5)] <- c(i-2, i-1, i, i+1, i+2)                                                      # An additional condition of the above is that the 'peaks' are not too close together so as to be inflection points (no 4)
          }
        }else{
          windowExtnd <- windowExtnd + 0.5                                                                                                      # If no 1 is the case, extend the window to look for peaks again
          windowPks <- c()
        }
      }

      if(length(windowPks) == 2){                                                                                                               # If two peaks are identified, they should be confirmed as genuine:
        if(max(windowPksY) > (m+(2*std)) | max(windowPksY) > (mean(deriv1[windowPks[1]:(windowPks[1]                                            # Check if the maximum peak exceeds the global or local threshold
           + (3*prevPkDist[i]))]) + 2*std(deriv1[windowPks[1]:(windowPks[1] + (3*prevPkDist[i]))]))){                                           # (local defined relative to peak to peak distance)
          if((windowPks[2] - windowPks[1]) < (prevPkDist[i]/3)){                                                                                # If it does, check if the two peaks are close togetber in time
            wX[length(wX)+1] <- windowPks[which(windowPksY == max(windowPksY))]                                                                 # (within 1/3rd of the peak to peak distance)
            cat('\n','Potential artefact',  ', plot(', (wX[i-1]-100), ':', (wX[i-1]+300),                                                       # If they are, mark the highest peak as artefactual
                ', deriv1[', (wX[i-1]-100), ':', (wX[i-1]+300), '], type = "l") ,', 'wave',
                i, '+/- 2 removed because two peaks found too close together')
          }else{                                                                                                                                # If they are not, go through both peaks in turn to confirm if genuine:
            if((windowPksY[1] > (m+(2*std))) | windowPksY[1] > (mean(deriv1[windowPks[1]:(windowPks[1]                                          # Check if the first peak can be marked as genuine by:
              + (3*prevPkDist[i]))]) + 2*std(deriv1[windowPks[1]:(windowPks[1] + (3*prevPkDist[i]))])) &                                        # 1. comparing to time series threshold
              windowPksY[1] > (windowPksY[2]/2)){                                                                                               # 2. comparing to local threshold
                wX[length(wX)+1] <- windowPks[1]                                                                                                # 3. comparing to height of the other peak (should exceed it's half maximum)
                if(windowPksY[2] > (m+(2*std)) | windowPksY[2] > (mean(deriv1[windowPks[1]:(windowPks[1]                                        # If it can, the second peak can be marked as genuine with the same criteria
                + (3*prevPkDist[i]))]) + 2*std(deriv1[windowPks[1]:(windowPks[1] + (3*prevPkDist[i]))])) &
                windowPksY[2] > (windowPksY[1]/2)){
                  wX[length(wX)+1] <- windowPks[2]
              }
            }else{                                                                                                                              # If the first peak is not genuine, check the second and identify only the
              if(windowPksY[2] > (m+(2*std)) | windowPksY[2] > (mean(deriv1[windowPks[1]:(windowPks[1] +                                        # second peak as genuine if appropriate.
                (3*prevPkDist[i]))]) + 2*std(deriv1[windowPks[1]:(windowPks[1] + (3*prevPkDist[i]))])) &
                windowPksY[2] > (windowPksY[1]/2)){
                wX[length(wX)+1] <- windowPks[2]
              }
            }
          }
        }else{                                                                                                                                  # If the maximum peak did not exceed threshold, extend the window and look again.
          windowExtnd <- windowExtnd + 0.5
          windowPks <- c()
        }
      }

      if(length(windowPks) == 1){                                                                                                               # If one peak is identified, confirm it is genuine by:
        if(windowPksY > (m+(2*std)) | windowPksY > (mean(deriv1[windowPks[1]:(windowPks[1] +                                                    # 1. comparing to time series threshold
          (3*prevPkDist[i]))]) + 2*std(deriv1[windowPks[1]:(windowPks[1] + (3*prevPkDist[i]))])) |                                              # 2. comparing to local threshold
          windowPksY > (predict(d1p, wX[i-1])*0.9)){                                                                                            # 3. comparing it to the height of the previous peak
            wX[length(wX)+1] <- windowPks
        }else{
          if((i-1) %in% artefacts){                                                                                                             # If the above criteria are not met, check if the previous peak was artefactual
            wX[length(wX)+1] <- windowPks
          }else{                                                                                                                                # If it was not, extend the window
            windowExtnd <- windowExtnd + 0.5
            windowPks <- c()
          }
        }
      }

      #############Window Artefact Identification: #############

      if(windowExtnd > 2 & is.na(printed)){                                                                                                     # If the window has been extended more than twice to find a peak,
        cat('\n','Potential artefact',  ', plot(', (wX[i-1]-100), ':', (wX[i-1]+300), ', deriv1[',                                              # it is likely that that peak may be an artefact; label it as such
           (wX[i-1]-100), ':', (wX[i-1]+300), '], type = "l") ,', 'wave', i, '+/- 2 removed because window needed extending')
        printed <- 1
        artefacts[length(artefacts) + c(1, 2, 3, 4, 5)] <- c(i-2, i-1, i, i+1, i+2)                                                             # i-1 and subsequent adjustments are used because wX[i] does not yet exist
      }

      if(min(window[[i]]) < (m-(3*std)) & is.na(printed)){                                                                                      # If a window contains a value that drops considerably below the mean,
        cat('\n','Potential artefact',  ', plot(', (wX[i-1]-100), ':', (wX[i-1]+300), ', deriv1[',                                              # label the peak found in that window as an artefact
           (wX[i-1]-100), ':', (wX[i-1]+300), '], type = "l") ,', 'wave', i, '+/- 2 removed because the window contains a very low value')
        artefacts[length(artefacts) + c(1, 2, 3, 4, 5)] <- c(i-2, i-1, i, i+1, i+2)
      }

      windowExtnd <- windowExtnd + 0.1                                                                                                          # If no peaks are found in a window (or only spurious ones found),
    }                                                                                                                                           # extend the window and look again
  }

  ############# Post Peak Identification Cleaning: #############

  d1wY <- predict(d1p, wX)                                                                                                                      # Find summary statistics of identified peak y-values
  m <- mean(d1wY)
  std <- sd(d1wY)
  if(length(artefacts) > 0){                                                                                                                    # Remove identified artefacts (should this be before calculating mean??)
    wX <- wX[-artefacts]
    d1wY <- d1wY[-artefacts]
  }

  artefacts <- c()                                                                                                                              # Check for unusually tall peaks, and label them as artefacts if:
  for(i in 2:(length(wX)-1)){
    if(d1wY[i] > 1.5*d1wY[i+1] & d1wY[i] > 1.5*d1wY[i-1] | d1wY[i] > (m+(5*std))){                                                              # 1. the ith wave height exceeds local (comparison with i-1 and i+1) or time series thresholds
      cat('\n','Potential artefact',  ', plot(', (wX[i-1]-100), ':', (wX[i-1]+300), ', deriv1[',
          (wX[i-1]-100), ':', (wX[i-1]+300), '], type = "l") ,', 'wave', i, '+/- 2 removed because of a very tall peak')
      artefacts[length(artefacts) + c(1, 2, 3, 4, 5)] <- c(i-2, i-1, i, i+1, i+2)
    }
    if(i > 5 & i < (length(wX)-5)){
      if(d1wY[i] > (1.5*mean(d1wY[c((i-5):(i+5))]))){
        cat('\n','Potential artefact',  ', plot(', (wX[i-1]-100), ':', (wX[i-1]+300), ', deriv1[',                                              # 2. the ith wave height exceeds another local threshold (derived from i-5 to i+5)
          (wX[i-1]-100), ':', (wX[i-1]+300), '], type = "l") ,', 'wave', i, '+/- 2 removed because of a very tall peak')
        artefacts[length(artefacts) + c(1, 2, 3, 4, 5)] <- c(i-2, i-1, i, i+1, i+2)
      }
    }
  }

  ############# Output: #############

  wY <- predict(sp, wX)                                    # Find W y_values on original trace
  Ws <- data.frame(wX, wY, d1wY)                           # Create output dataframe
  if(length(artefacts) > 0){                               # Remove any further artefacts identified
    Ws <- Ws[-artefacts, ]
  }
  Ws <- Ws[-1, ]                                           # Remove the first and last waves
  Ws <-Ws[-nrow(Ws), ]

  colnames(Ws) <- c("wX", "wY", "wYD1")                    # Outputs as described above
  return(Ws)
}


find_u_v <- function(wx, wy, d1, d1p, spline, sr = samplingRate, plot = FALSE){
  ########################################################################################################################################
  # FindUV identifies the points on the systolic upstroke that correspond to the two half maximum values on the corresponding first
  # derivative peak. The first, before w, is denoted U. The second, after w, is denoted V.

  # Inputs:
  # wx (A vector of x-coordinates corresponding to w points on the PPG time series)
  # wy (A vector of y-coordinates corresponding to w points on the PPG time series)
  # d1 (The first derivative time series, discrete form)
  # d1p (The first derivative time series, polynomial spline form)
  # spline (The original PPG time series, polynomial spline form)
  # sr (samplingRate)
  # plot (Logical, will plot the first derivative time series with U and V values as points if set to true)

  # Outputs:
  # uX (A vector of x-xoordinates corresponding to U points on the PPG time series)
  # uY (A vector of y-xoordinates corresponding to U points      "       "        )
  # vX (A vector of x-xoordinates corresponding to V points      "       "        )
  # vY (A vector of y-xoordinates corresponding to V points      "       "        )
  ########################################################################################################################################

  wHalfHeight <- predict(d1p, wx)/2                                                           # For each w point, identify the half maximum on first derivative
  halfHeightX <- c()
  halfHeightY <- c()
  for(i in 1:length(wHalfHeight)){                                                            # For each 1st derivative peak, create a polynomial spline of the peak only
    d1PeakSub <- CubicInterpSplineAsPiecePoly((round(wx[i])-(sr/8)):(round(wx[i])+(sr/8)),
                 d1[(round(wx[i])-(sr/8)):(round(wx[i])+(sr/8))], "natural")
    preHalfHeights <- solve(d1PeakSub, b = wHalfHeight[i])                                    # Identify the x-coordinates of the new spline when the y-value = the half maximum

    if(length(preHalfHeights) < 2){                                                           # If only one coordinate is found, extend the length of the spline,
      d1PeakSub <- CubicInterpSplineAsPiecePoly((round(wx[i])-(sr/4)):(round(wx[i])+(sr/4)),          # and search again
                   d1[(round(wx[i])-(sr/4)):(round(wx[i])+(sr/4))], "natural")
      preHalfHeights <- solve(d1PeakSub, b = wHalfHeight[i])
    }

    if(length(preHalfHeights) > 2){                                                           # More than one x-coordinate is sometimes found if the gradient of the upstroke
      a <- preHalfHeights - wx[i]                                                             # is particularly variable
      b <- c()                                                                                # In such cases, find the time (x-axis) difference between each detected half height and w
      for(j in 1:(length(a)-1)){                                                              # Select the two values with the most similar difference before and after w
        b[j] <- a[j] - a[j+1]
      }
      b[length(b) + 1] <- a[1] - a[length(a)]
      c <- which(abs(b) == min(abs(b)))
      if(c == length(b)){
        preHalfHeights <- c(preHalfHeights[c], preHalfHeights[1])
      }else{
        preHalfHeights <- c(preHalfHeights[c], preHalfHeights[c+1])
      }
    }
    halfHeightX[c((2*(i)-1), (2*(i)))] <- preHalfHeights                                      # Add the two values per peak to a vector of all in the time series
    halfHeightY[c((2*(i)-1), (2*(i)))] <- predict(d1PeakSub,                                  # And identify the corresponding y-coordinates for the (first derivative) times series
                                          halfHeightX[c((2*(i)-1), (2*(i)))])
  }

  if(plot){                                                                                   # Check results
    plot(d1p)
    points(halfHeightX, halfHeightY, pch = 19)
  }

  uX <- halfHeightX[seq_along(halfHeightX) %%2 != 0]                                          # Split the time series values into vectors for U and V
  vX <- halfHeightX[seq_along(halfHeightX) %%2 == 0]

  uY <- c()                                                                                   # Find the y-coordinates for U and V on the original PPG time series
  for(i in 1:length(uX)){
    uY[i] <- predict(spline, uX[i])
  }
  vY <- c()
  for(i in 1:length(vX)){
    vY[i] <- predict(spline, vX[i])
  }

  df <- data.frame(uX, uY, vX, vY)                                                            # Organize outputs (see summary box above)
  return(df)
}


find_o <- function(wx, inx, iny, d1p, sp){
  ########################################################################################################################################
  # FindO identifies the origin of the systolic peak for each beat. In the absence of a clear inflection point at the origin, O points are
  # derived from inflection points in the first derivative.

  # Inputs:
  # wx (A vector of x-coordinates corresponding to w points on the PPG time series)
  # inx (A vector of all inflection point x-coordinates in the PPG time series)
  # iny (A vector of all inflection point y-coordinates in the PPG time series)
  # d1p (The first derivative time series, polynomial spline form)
  # sp (The original PPG time series, polynomial spline form)

  # Outputs:
  # o (vector of O points)
  # inx (vector of updated inflection point x-coordinates)
  # iny (vector of updated inflection point y-coordinates)
  ########################################################################################################################################

  o <- c()                                                                                    # Create vector to store O values
  for(i in 1:length(wx)){
    o[i] <- max(which(inx < wx[i]))                                                           # Identify O points as the those inflection points that
  }                                                                                           # immediately precede w points.

  inflexD1 <- solve(d1p, b = 0, deriv = 1)                                                    # Find inflection points on the first derivative
  inflexD1y <- predict(d1p, inflexD1)
  o2 <- c()                                                                                   # For each beat, find the inflection point before w that is
  for(i in 1:length(wx)){                                                                     # also below 0 (points above 0 tend to be spurious)
    o2[i] <- max(which(inflexD1 < wx[i] & inflexD1y < 0))
  }

  for(i in 1:length(wx)){                                                                     # For each beat, replace the originally identified O with
    if((inx[o][i] - inflexD1[o2][i]) < 0){                                                    # the O derived from the first derivative if the latter
      inx[o][i] <- inflexD1[o2][i]                                                            # is closer to w in time (this occurs when there is no
      iny[o][i] <- predict(sp, inx[o][i])                                                     # inflection point at the origin of the original PPG signal)
      }
  }

  tmp <- list(inx, iny, o)                                                                    # return the o points, as well as the inflection points
  return(tmp)                                                                                 # which have been repositioned due to the above replacement
}


preclean_wuv <- function(w, uv, o, samp, sp, q = FALSE){
  ########################################################################################################################################
  # PreClean takes u, v and w values and assesses the x-axis (time) difference between them for each beat. For normal beats, time from
  # u to w is around half (50%) of the time from u to v. Beats with abnormal / artefactual systolic upstrokes tend to have outlying
  # values for this measure. Thus PreClean can identify and remove them.

  # Inputs:
  # w (vector of all x-axis coordinates corresponding to w points)
  # uv (a dataframe containing x and y coordinates for all u and v values)
  # o (vector of O points)
  # samp (sample rate)
  # sp (The original PPG time series, discrete form)
  # q (Logical, will pause function and give the option of plotting rejected beats)

  # Outputs:
  # w (as above, with aberrant beats removed)
  # uv (              "       "             )
  # o (               "       "             )
  ########################################################################################################################################

  vDist <- c()                                                                                # For each wave, find the timing of w relative
  for(i in 1:length(w$wX)){                                                                   # to u and v, expressed as a percentage
    vDist[i] <- (w$wX[i] - uv$uX[i]) / (uv$vX[i] - uv$uX[i])*100
  }
  # plot(w$wX, vDist, type = "l")                                                             # This value may be of interest in itself (see plot)

  sdpdtv <- sd(vDist)                                                                         # Make a vector of all beats where this value is abnormal,
  pdtvWaves <- c(which(vDist > (sdpdtv + median(vDist)) & vDist > 70),                        # either by being greater than one standard deviation
                 which(vDist < (median(vDist) - sdpdtv) & vDist < 30))                        # from the median, or having an absolute value < 30 or > 70

  if(length(pdtvWaves) > 0){
    cat("\n", length(pdtvWaves),
        "waves removed due to abnormal distances between u, v and w" )
    if(q == TRUE){                                                                            # If q = T, enter debug / plot routine
      plotyyy <- 0
      while(plotyyy == 0){
        plotyy <- readline(prompt = "Would you like to view? (enter yes or no)")
        if(plotyy == "yes"){
          for(i in 1:length(pdtvWaves)){
            if(pdtvWaves[i] == 1){
              plot(1:(samp*10), sp[1:(samp*10)], type = "l")
              points(w$wX, w$wY)
              points(w$wX[pdtvWaves[i]], w$wY[pdtvWaves[i]], pch =19, col = 'red')
              points(uv$uX, uv$uY)
              points(uv$vX, uv$vY)
            }else{
              plot((w$wX[pdtvWaves[i]]-samp*5):(w$wX[pdtvWaves[i]]+samp*5),
                   sp[(w$wX[pdtvWaves[i]]-samp*5):(w$wX[pdtvWaves[i]]+samp*5)], type = "l")
              points(w$wX, w$wY)
              points(w$wX[pdtvWaves[i]], w$wY[pdtvWaves[i]], pch =19, col = 'red')
              points(uv$uX, uv$uY)
              points(uv$vX, uv$vY)
            }
          }
          plotyyy <- 1
        }
        if(plotyy == "no"){
          cat("\n", "ok")
          plotyyy <- 1
        }
        if(plotyy != "yes" & plotyy != "no"){cat("\n", "please enter 'yes' or 'no'")}
      }
    }
    w <- w[-pdtvWaves, ]                                                                      # Remove beats with abnormal values from all relevant vectors
    uv <- uv[-pdtvWaves, ]
    o <- o[-pdtvWaves]
  }
  return(list(w, uv, o))
}


baseline <- function(inx, iny, o, dat, sp, plot = FALSE){
  ########################################################################################################################################
  # RemoveBaselineDrift generates a spline to fit the wandering baseline of the PPG time series and subtracts it from the time series.
  # Given baseline drift is indicative of vasomotion and respiratory modulation, it may be of interest to extract the spline.

  # Inputs:
  # inx (A vector of all inflection point x-coordinates in the PPG time series)
  # iny (A vector of all inflection point y-coordinates in the PPG time series)
  # o (A vector of all O points)
  # dat (The original PPG time series (preprocessed))
  # sp (The original PPG time series, polynomial spline form)
  # plot (Logical, plots baseline spline and the original PPG time series)

  # Outputs:
  # baseCor (time series with baseline wander removed)
  ########################################################################################################################################

  sfunction2 <- splinefun(inx[o], iny[o], method = "natural")                                 # Create a spline from the O points through
  splineBase <- sfunction2(seq(1, length(dat)), deriv = 0)                                    # the time series

  if(plot){                                                                                   # Plot the baseline spline and time series
    plot(sp)
    points(inx[o], iny[o], pch = 19)
    lines(splineBase)
  }

  baseCor <- dat - splineBase                                                                 # Subtract the baseline spline from the time
  if(plot){                                                                                   # series, thereby removing baseline drift
    plot(baseCor, type = "l")
    lines(1:length(dat), seq(from = 0, to = 0, length.out = length(dat)))                     # Plot new baseline (y = 0)
  }

  return(baseCor)
}


clean_wuv <- function(wuv, sp, inx, o, samp, bc, q = FALSE){
  ########################################################################################################################################
  # CleanWuv first identifies erroneous beats and removes them, by identifying abnormal values of U and V, and the y-axis difference
  # between them. It then calculates O-O intervals and inter-beat intervals, and removes beats preceding large intervals.

  # Inputs:
  # wuv (a dataframe of all w, u and v values corresponding to detected and pre-cleaned beats in the time series)
  # sp (the baseline-corrected PPG time series, polynomial spline form)
  # inx (a vector of all inflection point x-coordinates in the PPG time series)
  # o (a vector of all O points)
  # samp (sampling rate)
  # bc (the baseline-corrected PPG time series, discrete form)
  # q (logical, will pause function and give the option of plotting rejected beats)

  # Outputs:
  # d (a dataframe combining the following three structures):
    # wuv (the inputted dataframe of w, u and v values, with rows corresponding to erroneous beats removed)
    # diffVU (a vector of y-axis differences between u and v for each beat - for scaling purposes)
    # o2 (a vector of O points, with those corresponding to erroneous beats removed)
  # ibi (a vector of all inter-beat intervals (x-axis differences between sucessive W points))
  # oDiff (a vector of all O-O intervals (x-axis differences between successive O points))
  ########################################################################################################################################

  o2 <- o                                                                                     # Define a second vector for O from which
                                                                                              # rejected waves are to be removed

  falseU <- c()                                                                               # Identify U values that are implausibly far
  for(i in 1:(nrow(wuv)-1)){                                                                  # from baseline
    if(wuv$uY[i] > (median(wuv$uY) + 2*std(wuv$uY)) & wuv$uY[i] > 1 &
       wuv$uY[i] > wuv$uY[i+1]*2 | wuv$uY[i] < -50){
      falseU[i] <- i
    }
  }
  if(length(falseU) > 0){
    falseU <- falseU[!is.na(falseU)]
    cat("\n", length(falseU), "/", nrow(wuv),
        "waves removed due to U having an abnormally high y-value relative to baseline")
    if(q == TRUE){                                                                            # Optional debug mode for plotting waves
      plotyyy <- 0                                                                            # with erroneous U values
      while(plotyyy == 0){
        plotyy <- readline(prompt = "Would you like to view? (enter yes or no)")
        if(plotyy == "yes"){
          for(i in 1:length(falseU)){
            plot((wuv$wX[falseU[i]]-samp*2):(wuv$wX[falseU[i]]+samp*2),
                 bc[(wuv$wX[falseU[i]]-samp*2):(wuv$wX[falseU[i]]+samp*2)], type = "l")
            points(wuv$uX[falseU[i]], wuv$uY[falseU[i]], pch = 19)
            points(wuv$uX[falseU[i]-1], wuv$uY[falseU[i]-1])
            points(wuv$uX[falseU[i]+1], wuv$uY[falseU[i]+1])
          }
          plotyyy <- 1
        }
        if(plotyy == "no"){
          cat("\n", "ok")
          plotyyy <- 1
        }
        if(plotyy != "yes" & plotyy != "no"){cat("\n", "please enter 'yes' or 'no'")}
      }
    }
    if(length(falseU) > 0){
      o2 <- o2[-falseU]                                                                       # Remove erroneous U values
      wuv <- wuv[-falseU, ]
    }
  }

  diffVU <- wuv$vY - wuv$uY                                                                   # Find y-axis v-u difference (scale factor) for each wave
  if(length(which(diffVU == 0)) > 0){                                                         # Remove waves where the above difference is
    dup <- which(diffVU == 0)                                                                 # 0 (indicating an artefact)
    wuv <- wuv[-dup, ]
    diffVU <- diffVU[-dup]
    cat(length(which(diffVU == 0)), "/", nrow(wuv),
        "waves removed due to scale factors of 0 (u and v incorrectly identified)")
  }

  falseScale <- which(diffVU < (median(diffVU) - 5*(std(diffVU))))                            # Identify beats where the v-u difference is abnormally small
  if(length(falseScale) > 0){                                                                 # and remove them (with optional debug mode as above)
    cat("/n", length(falseScale), "/", nrow(wuv),
        "waves removed due to scale factors of 0 (u and v incorrectly identified)")
    if(q == TRUE){
      plotyyy <- 0
      while(plotyyy == 0){
        plotyy <- readline(prompt = "Would you like to view? (enter yes or no)")
        if(plotyy == "yes"){
          for(i in 1:length(falseScale)){
            plot((wuv$wX[falseScale[i]]-samp*2):(wuv$wX[falseScale[i]]+samp*2),
               bc[(wuv$wX[falseScale[i]]-samp*2):(wuv$wX[falseScale[i]]+samp*2)], type = "l")
            points(wuv$uX[falseScale[i]], wuv$uY[falseScale[i]])
            points(wuv$vX[falseScale[i]], wuv$vY[falseScale[i]])
          }
          plotyyy <- 1
        }
        if(plotyy == "no"){
          cat("\n", "ok")
          plotyyy <- 1
        }
        if(plotyy != "yes" & plotyy != "no"){cat("\n", "please enter 'yes' or 'no'")}
      }
    }
    if(length(falseScale) > 0){
      o2 <- o2[-falseScale]
      wuv <- wuv[-falseScale, ]
      diffVU <- diffVU[-falseScale]
    }
  }

  oY <- predict(sp, inx[o])                                                                   # Identify O y-values on the original PPG trace
  owDiff <- c()                                                                               # Find the x-axis difference between O and W
  for(i in 1:length(wuv$wX)){
    owDiff[i] <- wuv$wX[i] - inx[o[i]]
  }

  oDiff <- c()                                                                                # Find the x-axis difference between successive o_points
  for(i in 1:(length(inx[o])-1)){
    oDiff[i] <- inx[o[i+1]] - inx[o[i]]
  }

  ibi <- c()                                                                                  # Find the x-axis difference between successive W points,
  for(i in 1:(length(wuv$wX))-1){                                                             # and designate as inter-beat intervals
    ibi[i] <- wuv$wX[i+1] - wuv$wX[i]
  }

  o2 <- o2[-length(o2)]                                                                       # Remove the last W (in case final beat is incomplete)
  wuv <- wuv[-nrow(wuv), ]
  diffVU <- diffVU[-length(diffVU)]

  # plot(ibi)                                                                                 # Identify abnormal inter-beat intervals indicative of
  # points(which(ibi > 1.25*median(ibi)),                                                     # an extended time between beats (e.g due to artefact)
  # ibi[which(ibi > 1.25*median(ibi))], pch = 19, col = "red")                                # These can be plotted (see left)

  endWaves <- c()                                                                             # Abnormal inter-beat intervals are defined as greater
  for(i in 2:(length(ibi)-1)){                                                                # than the median interval by a factor of 1.3, and
    if(ibi[i] > 1.3*median(ibi) & ( ibi[i] > 2*ibi[i-1] | ibi[i] > 2*ibi[i+1])){              # greater than at least one of the values adjacent
      endWaves[i] <- i                                                                        # to it by a factor of 2
    }
  }                                                                                           # Beats preceding abnormal intervals are removed as they
  if(length(endWaves) > 0){                                                                   # will not have an O point to mark their end
    endWaves <- endWaves[!is.na(endWaves)]
    cat("\n", length(endWaves), "/", nrow(wuv),
        "waves removed due to the end of the wave (next o point)
         not being corrected for baseline")
    if(q == TRUE){
      plotyyy <- 0
      while(plotyyy == 0){
        plotyy <- readline(prompt = "Would you like to view? (enter yes or no)")
        if(plotyy == "yes"){
          for(i in 1:length(endWaves)){
            if(endWaves[i] == 1){
              plot(1:(samp*10), bc[1:(samp*10)], type = "l")
              points(wuv$wX[endWaves[i]], wuv$wY[endWaves[i]], pch =19, col = 'red')
            }else{
              plot((wuv$wX[endWaves[i]]-samp*5):(wuv$wX[endWaves[i]]+samp*5),
                   bc[(wuv$wX[endWaves[i]]-samp*5):(wuv$wX[endWaves[i]]+samp*5)], type = "l")
              points(wuv$wX[endWaves[i]], wuv$wY[endWaves[i]], pch =19, col = 'red')
              points(wuv$wX[endWaves[i]+1], wuv$wY[endWaves[i]+1], pch = 19, col = 'red')
              points(wuv$wX, wuv$wY)
            }
          }
          plotyyy <- 1
        }
        if(plotyy == "no"){
          cat("\n", "ok")
          plotyyy <- 1
        }
        if(plotyy != "yes" & plotyy != "no"){cat("\n", "please enter 'yes' or 'no'")}
      }
    }
    if(length(endWaves) > 0){
      o2 <- o2[-endWaves]
      wuv <- wuv[-endWaves, ]
      diffVU <- diffVU[-endWaves]
    }
  }
  d <- cbind(wuv, diffVU, o2)                                                                 # Bind vectors with element number corresponding to beat number
  dat <- list(d, ibi, oDiff)                                                                  # into a single dataframe before outputting
  return(dat)
}


sep_beats <- function(odiff, bc, samp, wuv, wvlen, inx, o, ibi, scale = TRUE, q = FALSE, subset = FALSE, boundaries = NULL){

  # Optional subsetting of Iso Waves:
  if(subset == T){
    #Find the Iso specific part of the data
    ab_ibi <- which(ibi > mean(ibi) + (4*sd(ibi)) | ibi < mean(ibi) - (4*sd(ibi))) #excluding ibi points 4 sds above mean
    ibi[ab_ibi] <- NA
    ibi <- ibi[!is.na(ibi)]
    meds <- rollmedian(ibi, k = 19) #rolling median
    basemed <- mean(meds[1:50]) #baseline defined as the average of the first 50 rolling median points... I'm not sure if this is sensible
    halfs <- (basemed - min(meds))/2 #half diff between minimum and baseline
    post <- min(meds) + halfs   # the half way point (the y axis point that represents the half height)
    # Sometimes the minimum is incorrect (due to outlier ibi values):
    # Suggest first finding the minimum of meds and then finding a minimum ibi local to that (+/- 10 beats):
    meds_roi <- (which(meds == min(meds))[1] - 10) : (which(meds == min(meds))[1] + 10)
    # If no discernable minimum in the time series, it is possible a value far from the middle would be included, which would introduce negative values to meds_roi
    if( sum(which(meds_roi < 1)) > 0  | sum(which(meds_roi > length(ibi))) > 0 ){
      message("Warning: No discernable minimum in 2mg trace")
      Sys.sleep(10)
      meds_roi <- (round(length(ibi)/2) - 10) : (round(length(ibi)/2) + 10)
    }
    indx <- meds_roi[which(ibi[meds_roi] == min(ibi[meds_roi]))]    # indx is taken as the minimum
    # Check again that minimum is not too near to beginning / end of trace:
    if(indx < 15 | indx > (length(ibi) - 15)){
      message("Warning: No discernable minimum in 2mg trace")
      Sys.sleep(10)
      meds_roi <- (round(length(ibi)/2) - 10) : (round(length(ibi)/2) + 10)
    }
    pre_indx <- which(abs(ibi[1:indx] - post) < 0.5) + 0 # finds the values that are closest to the half point  # To ensure pre_indx is not < 75, have change ibi[1:indx] to ibi[75:indx], and 0 to 74...
    # If no values within 0.5 of the half-way point, extend the range to 1.5 and so on:
    a <- 0.5
    while(length(pre_indx) < 1){
      a <- a + 1
      pre_indx <- which(abs(ibi[1:indx] - post) < a) + 0 # Have to adjust for the fact that 1 = 75 now
    }
    in2 <- which(abs(pre_indx - indx) == min(abs(pre_indx - indx))) # take the one closest to the minimum
    pre_indx <- pre_indx[in2]

    # Find the closest to the half point from the points after the minimum
    post_indx <- indx + which(abs(ibi[indx:length(ibi)] - post) < 0.5)   # changed length(ibi) to 210
    a <- 0.5
    while(length(post_indx) < 1){
      a <- a + 0.25
      post_indx <- indx + which(abs(ibi[indx:length(ibi)] - post) < a)  # changed length(ibi) to 210
    }
    while(post_indx[1] < indx + 15){
      post_indx <- post_indx[-1]
    }
    post_indx <- post_indx[1]
    rm(meds, basemed, halfs, post, indx, in2)
    ppg_pre <- (which((ppg[,1]) == beat[,1][pre_indx - 1]))    # Before infusion
    ppg_post <- which((ppg[,1]) == beat[,1][post_indx + 1])    # After infusion wears off
    # Sometimes beat[, 1] will not contain the values specified to subset, in which case...
    if(length(ppg_pre) < 1 | length(ppg_post) < 1){
      message("Subsetting failed: review of time series suggested")
      Sys.sleep(10)
      ppg_pre <- which((ppg[,1]) == beat[1,1])
      ppg_post <- which((ppg[,1]) == beat[nrow(beat),1])
    }

    # You might have to re-derive pre and post indx from ppg pre and post
    subs <- which(wuv$wX > ppg_pre & wuv$wX < ppg_post)
    cat(length(subs), "beats in subset (pre-cleaning)")
    #Sys.sleep(2)

    # Subset:
    wuv <- wuv[subs, ]
    # Subset ibi:
    ibi <- ibi[subs]
  }

  if(subset == "rep"){

    # Subset according to previous time series:
    subs <- which(wuv$wX > boundaries[1])
    subs_pre <- subs[1:boundaries[2]]
    # There's a chance there aren't enough beats remaining in the placebo time series, in which case take as many as possible:
    tmp <- sum(is.na(subs_pre))
    if(tmp > 0){
      subs <- subs[1:(length(subs_pre)-tmp)]
      boundaries[2] <- length(subs)
    }else{
      subs <- subs[1:boundaries[2]]
    }

    # Subset:
    wuv <- wuv[subs, ]
    # Subset:
    ibi <- ibi[subs]
  }


  # Redefine baseline corrected data:
  sourcedata <- bc[1:length(undetrended)]

  # Define a dataframe to contain individual waves (first column is the x-axis (in seconds) - currently set for bioradio data):
  pulse <- data.frame(seq((-141/(samp*10)), ((wvlen*15 -9)-142)/(samp*10), by = 1/(samp*10)))

  afterO <- list()
  beforeO <- list()
  extra_long_wave <- c()
  for(i in 1:length(wuv$wX)){

    splPolySub <- CubicInterpSplineAsPiecePoly((round(wuv$uX[i])-15):(round(wuv$uX[i]) + (wvlen+10)), sourcedata[(round(wuv$uX[i])-15):(round(wuv$uX[i]) + (wvlen+10))], "natural")
    # Turn into discrete form
    splSub <- predict(splPolySub, c(seq((wuv$uX[i]-14), (wuv$uX[i]+(wvlen+5)), 0.1)))
    # Make into dataframe:
    splSub <-  as.data.frame(splSub)
    splSub <- cbind(splSub, c(seq((wuv$uX[i]-14), (wuv$uX[i]+(wvlen+5)), 0.1)))
    colnames(splSub) <- c('y', 'x')
    # Scale so that v-u = 1
    if(scale == TRUE){
      splSub$y <- splSub$y/(wuv$diffVU[i])
    }
    # Adjust such that u = 0, v = 1 on y-axis
    yDiff <- splSub$y[141]
    splSub$y <- splSub$y - yDiff

    if(scale == TRUE){
      # Find the x-value for each wave that corresponds to when it = 0.5 in height (this requires making a spline):
      splPolySub2 <- CubicInterpSplineAsPiecePoly(splSub$x, splSub$y, "natural")
      halfCross <- solve(splPolySub2, b = 0.5, deriv = 0)
      halfCross <- halfCross[which(abs(halfCross - wuv$wX[i]) == min(abs(halfCross - wuv$wX[i])))]

      # Convert to discrete form again: (need to redefine splSub)
      splSub2 <- predict(splPolySub, c(seq((halfCross-14), (halfCross+(wvlen+5)), 0.1)))
      splSub2 <-  as.data.frame(splSub2)
      splSub2 <- cbind(splSub2, c(seq((halfCross-14), (halfCross+(wvlen+5)), 0.1)))
      colnames(splSub2) <- c('y', 'x')

      # Scale again
      splSub2$y <- splSub2$y/(wuv$diffVU[i])
      # Adjust y-axis such that u = 0, v = 1
      yDiff <- wuv$uY[i] / wuv$diffVU[i]
      splSub2$y <- splSub2$y - yDiff
    }else{
      splSub2 <- splSub
    }

    # Find next_o
    afterO[[i]] <- which(splSub2$x > inx[o][min(which(inx[o] > wuv$wX[i]))])

    # Find values before the o of the wave itself
    beforeO[[i]] <- which(splSub2$x < inx[o][max(which(inx[o] < wuv$wX[i]))])

    # Correct such that x column and wave column are correctly aligned
    splSub3 <- c()
    for(j in 1:nrow(splSub2)){
      splSub3[j+1] <- splSub2$y[j]
    }

    # If splSub3 and nrow(pulse) are the same length, you need only adjust afterO
    if(length(splSub3) == nrow(pulse)){
      if(length(afterO[[i]]) > 0){
        diff2 <- abs(length(splSub3) - max(afterO[[i]]))
        for(j in 1:diff2){
          afterO[[i]] <- c(afterO[[i]], (max(afterO[[i]]) + 1))
        }
      }
    }

    # Or
    # Adjust such that splSub3 is the same length as pulse
    if(length(splSub3) > nrow(pulse)){
      diff <- length(splSub3) - nrow(pulse)
      len <- length(splSub3)
      splSub3 <- splSub3[-((len - (diff-1)):len)]
      if(diff > 1){     # must correct the afterO values so that they also do not contain values beyond the length of splSub3 (include case where length of afterO[[i]] is one so the code works...)
        if(length(afterO[[i]]) > 1 ){
          afterO[[i]] <- afterO[[i]][-(which(afterO[[i]] > length(splSub3)))]  #afterO[[i]][1:(which(afterO[[i]] == (len - (diff-1))) - 1)
        }else{
          afterO[[i]] <- afterO[[i]][-(which(afterO[[i]] > length(splSub3)))]
        }
      }
    }


    if(length(splSub3) < nrow(pulse)){
      diff <- nrow(pulse) - length(splSub3)  # diff = how much longer pulse is than splSub3...
      splSub3 <- c(splSub3, rep(NA, diff))   # The extra rows in pulse are thus made up by NAs
      if(length(afterO[[i]]) > 0){
        diff2 <- length(splSub3) - max(afterO[[i]])   # This = how many elements at the end of splSub3 are after O.
        # This for loop adds values to after_o to make up the difference
        for(j in 1:diff2){
          afterO[[i]] <- c(afterO[[i]], (max(afterO[[i]]) + 1))
        }
      }
    }

    # Add column to dataframe
    pulse <- cbind(pulse, splSub3)
  }

  for(i in 1:(ncol(pulse) -1)){
    colnames(pulse)[i+1] <- paste("wave", i, sep = "_")
  }
  colnames(pulse)[1] <- "x"

  # Remove any values after O for each wave:
  for(i in 2:(ncol(pulse))){
    pulse[, i][afterO[[(i-1)]][-1]] <- NA
  }

  # Remove values before O before each wave:
  for(i in 2:(ncol(pulse))){
    pulse[, i][beforeO[[(i-1)]][-1]] <- NA
  }

  # Remove any extra long waves (i.e where a distance of 2 Os has been counted as one wave):
  # find average length of waves (without NAs) in pulse
  wavelengths <- c()
  for(i in 2:ncol(pulse)){
    wavelengths[i] <- length(pulse[, i][!is.na(pulse[, i])])
  }
  wavelengths <- wavelengths[!is.na(wavelengths)]


  ########## Cleaning: #########

  extra_long_wave <- c()
  for(i in 2:length(wavelengths)){
    if(wavelengths[i] > (mean(wavelengths) + sd(wavelengths)) & wavelengths[i] > 1.8*(wavelengths[i-1]) ){
      extra_long_wave[i] <- i
    }
  }
    extra_long_wave <- extra_long_wave[!is.na(extra_long_wave)]
    if(length(extra_long_wave) > 0){
      #rejected_waves <- rejected_waves
      cat("\n", length(extra_long_wave), "/", (ncol(pulse)-1), "waves removed for being abnormally long")
      if(q == TRUE){
        plotyyy <- 0
        while(plotyyy == 0){
          plotyy <- readline(prompt = "Would you like to view? (enter yes or no)")
          if(plotyy == "yes"){
            for(i in 1:length(extra_long_wave)){
              plot((wuv$wX[extra_long_wave[i]]-samp*2):(wuv$wX[extra_long_wave[i]]+samp*2), bc[(wuv$wX[extra_long_wave[i]]-samp*2):(wuv$wX[extra_long_wave[i]]+samp*2)], type = "l")
              points(wuv$wX[extra_long_wave[i]], wuv$wY[extra_long_wave[i]], pch =19, col = 'red')
            }
            plotyyy <- 1
          }
          if(plotyy == "no"){