#Packages Chunk
library(psych)
## Warning: package 'psych' was built under R version 3.2.5
vr_data <- data.frame(read.csv("VR_Data2.csv"))
str(vr_data)
## 'data.frame':    221 obs. of  9 variables:
##  $ ID      : Factor w/ 38 levels "gw1501","gw1530",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ hr      : num  74.5 73.5 85.5 78.1 73.8 ...
##  $ pt      : num  94.1 94.7 94.8 91.1 80.7 ...
##  $ sc      : num  3.71 3.74 2.22 1.9 1.8 ...
##  $ delta_hr: num  -13.13 -17.93 -2.09 -15.18 -11.67 ...
##  $ delta_pt: num  -2.15 0.11 -2.52 -1.87 -3.2 -3.99 -6.28 -7.72 -4.57 -0.25 ...
##  $ delta_sc: num  1.98 0.77 0.075 0.44 0.26 ...
##  $ time    : num  417 178 248 174 185 ...
##  $ UserType: Factor w/ 2 levels "mobile","vr": 2 2 2 2 2 2 2 2 2 2 ...
vr_data$UserType <- factor(vr_data$UserType,levels = c("mobile","vr"),labels=c("mobile","vr"))

vr_out <- data.frame(boxplot(vr_data$delta_hr))

dim(vr_out)
## [1] 20  6
View(vr_out)
#The goal of this research is to identify if those that used the vr version of a meditation treatment during a break period as compared to a mobile version of the paced breathing exercise experienced greater reductions in heartrate readings durning the treatment. 

#H1 Significant differences are present in terms of heartrate reduction for those the used the VR when compared to the mobile app for paced breathing excerises. 

#H0 No significant differences are present in terms of heartrate reduction for those the used the VR when compared to the mobile app for paced breathing excerises. 

#First lets just look at some basic stats between the two groups and then we are going to use a simple t-test to compared group average differences in heartrate. 

#Start by splitting the dataframe just for ease of analysis. 
#Split our data.frame, seems to work better when we pull out the splitting term from the original data.frame
split <- as.factor(vr_data$UserType)
#run the split
split_vr_data <- split.data.frame(vr_data,split)

#Re-index the data.frame for vr
vr <- data.frame(split_vr_data$vr)
#Re-index the data.frame for mobile
mobile <- data.frame(split_vr_data$mobile)
str(vr)
## 'data.frame':    123 obs. of  9 variables:
##  $ ID      : Factor w/ 38 levels "gw1501","gw1530",..: 1 1 1 1 1 1 1 2 2 2 ...
##  $ hr      : num  74.5 73.5 85.5 78.1 73.8 ...
##  $ pt      : num  94.1 94.7 94.8 91.1 80.7 ...
##  $ sc      : num  3.71 3.74 2.22 1.9 1.8 ...
##  $ delta_hr: num  -13.13 -17.93 -2.09 -15.18 -11.67 ...
##  $ delta_pt: num  -2.15 0.11 -2.52 -1.87 -3.2 -3.99 -6.28 -7.72 -4.57 -0.25 ...
##  $ delta_sc: num  1.98 0.77 0.075 0.44 0.26 ...
##  $ time    : num  417 178 248 174 185 ...
##  $ UserType: Factor w/ 2 levels "mobile","vr": 2 2 2 2 2 2 2 2 2 2 ...
View(mobile)
str(mobile)
## 'data.frame':    98 obs. of  9 variables:
##  $ ID      : Factor w/ 38 levels "gw1501","gw1530",..: 2 3 3 3 5 6 6 6 7 7 ...
##  $ hr      : num  55.7 66 60.2 81.5 65.9 ...
##  $ pt      : num  83 78.7 83.2 85.1 94.4 ...
##  $ sc      : num  5.97 1.72 2.32 1.59 6.13 ...
##  $ delta_hr: num  7.65 90.93 -2.55 31.21 -2.25 ...
##  $ delta_pt: num  0.07 -4.08 -0.84 -2.32 -0.28 ...
##  $ delta_sc: num  7.3 -2.07 -0.11 0 -0.82 ...
##  $ time    : num  209.1 342.7 74.2 116.5 72 ...
##  $ UserType: Factor w/ 2 levels "mobile","vr": 1 1 1 1 1 1 1 1 1 1 ...
sum(mobile$delta_hr)
## [1] 564.495
dim(mobile)
## [1] 98  9
describe(vr)
##           vars   n   mean    sd median trimmed   mad    min    max  range
## ID*          1 123  20.09 12.22  20.00   20.26 14.83   1.00  38.00  37.00
## hr           2 123  73.95 12.00  73.11   72.96 11.68  52.46 108.99  56.53
## pt           3 123  86.10  6.06  86.96   86.42  6.86  70.10  95.16  25.05
## sc           4 123   3.18  1.63   2.99    3.04  1.56   0.00  11.82  11.82
## delta_hr     5 123  -1.66 25.90  -2.46   -2.51 16.23 -67.12 132.57 199.70
## delta_pt     6 123  -1.04  2.45  -0.28   -0.65  1.19 -14.11   4.16  18.27
## delta_sc     7 123  -0.20  1.48  -0.08   -0.09  0.64  -7.04   4.60  11.63
## time         8 123 199.36 51.47 184.57  193.23 26.05  96.16 416.60 320.44
## UserType*    9 123   2.00  0.00   2.00    2.00  0.00   2.00   2.00   0.00
##            skew kurtosis   se
## ID*       -0.11    -1.37 1.10
## hr         0.80     0.49 1.08
## pt        -0.43    -0.86 0.55
## sc         1.54     5.52 0.15
## delta_hr   1.57     7.69 2.34
## delta_pt  -2.06     6.33 0.22
## delta_sc  -1.45     6.37 0.13
## time       1.50     3.28 4.64
## UserType*   NaN      NaN 0.00
describe(mobile)
##           vars  n   mean    sd median trimmed   mad    min    max  range
## ID*          1 98  20.16  9.85  22.00   20.38 10.38   2.00  35.00  33.00
## hr           2 98  75.39 14.58  73.08   73.48 10.76  55.57 133.67  78.10
## pt           3 98  84.32  6.30  84.95   84.46  7.52  70.98  95.33  24.34
## sc           4 98   2.79  1.72   2.42    2.63  1.35   0.00   7.99   7.99
## delta_hr     5 98   5.76 26.55   5.34    5.04 15.91 -80.54  90.93 171.47
## delta_pt     6 98  -1.67  3.47  -0.60   -1.03  1.03 -19.53   5.91  25.44
## delta_sc     7 98  -0.36  1.56  -0.24   -0.39  0.49  -4.55   7.30  11.86
## time         8 98 196.45 78.04 186.91  190.04 70.48  71.47 491.12 419.64
## UserType*    9 98   1.00  0.00   1.00    1.00  0.00   1.00   1.00   0.00
##            skew kurtosis   se
## ID*       -0.28    -1.29 1.00
## hr         1.69     3.94 1.47
## pt        -0.14    -1.14 0.64
## sc         0.87     0.53 0.17
## delta_hr   0.38     2.54 2.68
## delta_pt  -2.76     9.63 0.35
## delta_sc   1.14     6.59 0.16
## time       1.02     1.90 7.88
## UserType*   NaN      NaN 0.00
#Just at first glance it looks like on average the VR does reduce the heartrate when compared to the mobile app, which actually appears to increase participants heartrate on average. The users spent about the same amount of time at 199.36 seconds per session for VR and 196.45 for mobile.

#Also want to see what outliers are present for both groups and remove as necessary 
vr_boxplot_mobile <- boxplot(mobile$delta_hr)

vr_boxplot_vr <- boxplot(vr$delta_hr)

vr_boxplot_vr$out
## [1] 132.575  49.970 115.185 -48.420 -48.430 -53.885 -67.125 -54.040
vr_boxplot_mobile$out
##  [1]  90.930  71.790 -57.760 -80.540 -47.175  49.170  87.280 -47.540
##  [9] -40.335 -42.440  81.640  59.385
#Looks like we've got some really extreme values in the VR dataset so we are going to remove 132.575 and 115.185

#In order to somewhat balance we are also going to remove the two highest elements in the mobile, 90.930 and 87.280

#Below gives us a idea of which users in each group appear to be the major contributors to the data for mobile users 2674, 2778, 8243 and 8993 all used the devise 7 times or more with the average being 3.7 times per user that registered at least one usage
table_mobile_usage <- data.frame(table(mobile$ID))

describe(table_mobile_usage)
##       vars  n  mean    sd median trimmed   mad min max range skew kurtosis
## Var1*    1 38 19.50 11.11   19.5   19.50 14.08   1  38    37 0.00    -1.30
## Freq     2 38  2.58  2.53    3.0    2.28  2.97   0   9     9 0.85    -0.07
##         se
## Var1* 1.80
## Freq  0.41
View(table_mobile_usage)
#SO what we need is the none zero average
colSums(table_mobile_usage !=0)
## Var1 Freq 
##   38   26
#Freq total for non-zero is 26

sum(table_mobile_usage$Freq/26) 
## [1] 3.769231
#average for mobile user is 3.77 times

#Do the same with VR data 
table_vr_usage <- data.frame(table(vr$ID))
str(table_vr_usage)
## 'data.frame':    38 obs. of  2 variables:
##  $ Var1: Factor w/ 38 levels "gw1501","gw1530",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Freq: int  7 7 1 6 4 0 0 0 1 8 ...
View(table_vr_usage)
describe(table_vr_usage)
##       vars  n  mean    sd median trimmed   mad min max range skew kurtosis
## Var1*    1 38 19.50 11.11   19.5   19.50 14.08   1  38    37 0.00    -1.30
## Freq     2 38  3.24  3.21    3.0    2.94  4.45   0  13    13 0.81     0.11
##         se
## Var1* 1.80
## Freq  0.52
#Similarily 1501,1530,2972,3039,8442,8630 and 9846 all used the vr machine more than 7 times with the average usage being 

#SO what we need is the none zero average
colSums(table_vr_usage !=0)
## Var1 Freq 
##   38   27
#Freq total for non-zero is 27

sum(table_vr_usage$Freq/27) 
## [1] 4.555556
#average for mobile user is 4.56 times, so we already see that usage for the VR version is almost a full additional usage period. We will test to see if this is significantly later in the analysis 

#Remove the outliers VR dataset removing 132.575 and 115.185, Mobile 90.930 and 87.280 

vr_1<-vr[!(vr$delta_hr==132.575),]
vr_2<-vr_1[!(vr_1$delta_hr==115.185),]
View(vr_2)

mobile_1<-mobile[!(mobile$delta_hr==90.930),]
mobile_2<-mobile_1[!(mobile_1$delta_hr==87.280),]
View(mobile_2)

#Want to reset the row numbering before moving forward 
rownames(mobile_2) <- NULL
View(mobile_2)

rownames(vr_2) <- NULL
View(vr_2)

Ok now we can run some meaningfull stats and compare the results

describe(vr_2) # Can see the mean change in heartrate for VR is -3.74
##           vars   n   mean    sd median trimmed   mad    min    max  range
## ID*          1 121  20.30 12.20  21.00   20.53 14.83   1.00  38.00  37.00
## hr           2 121  73.78 12.02  72.29   72.73 11.29  52.46 108.99  56.53
## pt           3 121  86.21  6.05  87.39   86.55  6.79  70.10  95.16  25.05
## sc           4 121   3.18  1.61   2.99    3.04  1.52   0.00  11.82  11.82
## delta_hr     5 121  -3.74 20.34  -2.77   -3.07 15.74 -67.12  49.97 117.09
## delta_pt     6 121  -1.00  2.39  -0.28   -0.63  1.17 -14.11   4.16  18.27
## delta_sc     7 121  -0.14  1.35  -0.06   -0.06  0.62  -5.78   4.60  10.38
## time         8 121 199.01 50.18 184.57  192.82 25.40  96.16 416.60 320.44
## UserType*    9 121   2.00  0.00   2.00    2.00  0.00   2.00   2.00   0.00
##            skew kurtosis   se
## ID*       -0.13    -1.36 1.11
## hr         0.83     0.56 1.09
## pt        -0.47    -0.81 0.55
## sc         1.61     5.86 0.15
## delta_hr  -0.35     0.68 1.85
## delta_pt  -2.14     7.18 0.22
## delta_sc  -1.00     5.43 0.12
## time       1.58     3.77 4.56
## UserType*   NaN      NaN 0.00
describe(mobile_2) # The mean change in heartrate for mobile actually shows a increase of 4.02, so that's not the result we would want. 
##           vars  n   mean    sd median trimmed   mad    min    max  range
## ID*          1 96  20.33  9.80  23.00   20.54  8.90   2.00  35.00  33.00
## hr           2 96  75.46 14.70  73.08   73.52 11.07  55.57 133.67  78.10
## pt           3 96  84.48  6.25  85.10   84.64  7.39  70.98  95.33  24.34
## sc           4 96   2.78  1.73   2.42    2.62  1.35   0.00   7.99   7.99
## delta_hr     5 96   4.02 23.89   5.07    4.33 15.17 -80.54  81.64 162.18
## delta_pt     6 96  -1.64  3.50  -0.55   -0.97  1.05 -19.53   5.91  25.44
## delta_sc     7 96  -0.30  1.51  -0.22   -0.34  0.50  -4.55   7.30  11.86
## time         8 96 195.32 77.31 186.91  189.05 70.48  71.47 491.12 419.64
## UserType*    9 96   1.00  0.00   1.00    1.00  0.00   1.00   1.00   0.00
##            skew kurtosis   se
## ID*       -0.28    -1.29 1.00
## hr         1.67     3.82 1.50
## pt        -0.17    -1.10 0.64
## sc         0.88     0.56 0.18
## delta_hr  -0.13     2.47 2.44
## delta_pt  -2.78     9.61 0.36
## delta_sc   1.39     7.40 0.15
## time       1.04     2.11 7.89
## UserType*   NaN      NaN 0.00
#We also see that the average time of usage and number of total events are both larger for VR as compared to the Mobile application. 

Now we will run a simple t-test to compare the the change in heartrate and average length of to to see if significant differences between the two groups are present.

#Better check for normality real quick 
hist(vr_2$delta_hr)#looks pretty good

hist(mobile_2$delta_hr)#also more or less ok 

#Ok now onto the one-sided t-test, as we want to determine is vr heartrate change is less than that of the mobile

t.test(vr_2$delta_hr,mobile_2$delta_hr, alternative = "less", conf.level = .99)
## 
##  Welch Two Sample t-test
## 
## data:  vr_2$delta_hr and mobile_2$delta_hr
## t = -2.5364, df = 186.81, p-value = 0.00601
## alternative hypothesis: true difference in means is less than 0
## 99 percent confidence interval:
##        -Inf -0.5810417
## sample estimates:
## mean of x mean of y 
## -3.737066  4.023802
#Here we see that the likihood of selecting a vr entry that is less than or equal to the mobile average heartrate of 4.02 is .6% which is less than 1% required from the confidence level so we can suggest with 99% confidence that the vr heartrate change is significantly less that the mobile. 

```