Loading packages:
library(tidyverse)
library(devtools)
library(epiDisplay)
library(pastecs)
library(reshape2)
library(ggpubr)
Data provided by Spring Health.
data <- readxl::read_xlsx("spring_health_take_home_df.xlsx")
Start by looking at the data.
glimpse(data)
## Rows: 3,269
## Columns: 27
## $ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,…
## $ member_id_hashed <chr> "1b526ea6cb8c6cbb00180df016aacad87a889be78…
## $ assessment_created_at <chr> "2018-11-22T15:28:26Z", "2018-11-22T15:28:…
## $ questionnaire_kind <chr> "SDS", "PHQ9", "SDS", "PHQ9", "SDS", "PHQ9…
## $ questionnaire_created_at <chr> "2018-11-22T15:30:05Z", "2018-11-22T15:30:…
## $ PHQ9_q1 <chr> "N/A", "3", "N/A", "0", "N/A", "1", "N/A",…
## $ PHQ9_q2 <chr> "N/A", "2", "N/A", "1", "N/A", "1", "N/A",…
## $ PHQ9_q3 <chr> "N/A", "1", "N/A", "0", "N/A", "3", "N/A",…
## $ PHQ9_q4 <chr> "N/A", "3", "N/A", "0", "N/A", "2", "N/A",…
## $ PHQ9_q5 <chr> "N/A", "2", "N/A", "2", "N/A", "1", "N/A",…
## $ PHQ9_q6 <chr> "N/A", "1", "N/A", "1", "N/A", "0", "N/A",…
## $ PHQ9_q7 <chr> "N/A", "0", "N/A", "1", "N/A", "0", "N/A",…
## $ PHQ9_q8 <chr> "N/A", "0", "N/A", "1", "N/A", "1", "N/A",…
## $ PHQ9_q9 <chr> "N/A", "0", "N/A", "1", "N/A", "0", "N/A",…
## $ PHQ9_q10 <chr> "N/A", "1", "N/A", "1", "N/A", "1", "N/A",…
## $ PHQ9_risk <chr> "N/A", "Medium", "N/A", "Low / Medium", "N…
## $ PHQ9_score <chr> "N/A", "12", "N/A", "7", "N/A", "9", "N/A"…
## $ PHQ9_acuity <chr> "N/A", "moderate", "N/A", "mild", "N/A", "…
## $ PHQ9_positive <chr> "N/A", "TRUE", "N/A", "TRUE", "N/A", "TRUE…
## $ SDS_days_missed <chr> "0", "N/A", "2", "N/A", "1", "N/A", "2", "…
## $ SDS_work_impact <chr> "2", "N/A", "5", "N/A", "3", "N/A", "9", "…
## $ SDS_home_life_impact <chr> "2", "N/A", "2", "N/A", "1", "N/A", "4", "…
## $ SDS_days_unproductive <chr> "1", "N/A", "4", "N/A", "0", "N/A", "0", "…
## $ SDS_social_life_impact <chr> "7", "N/A", "3", "N/A", "3", "N/A", "7", "…
## $ SDS_risk <chr> "Low", "N/A", "Low", "N/A", "Low", "N/A", …
## $ SDS_positive <chr> "TRUE", "N/A", "TRUE", "N/A", "FALSE", "N/…
## $ first_number_in_member_hash <dbl> 1, 1, 0, 0, 0, 0, 2, 2, 4, 0, 0, 4, 4, 0, …
# rename the first column bc it was blank and was renamed in the load
data <- data %>% rename(row_num = `...1`)
# want to see what distinct values there are in non date/id columns
data_freq <- data %>% dplyr::select(-row_num
, -member_id_hashed
, -assessment_created_at
, -questionnaire_created_at)
apply((data_freq), 2, table)
#looks like N/A is not truly NA going to fix that
data <- data %>%
mutate_all(~na_if(., "N/A"))
colSums(is.na(data))
#just want to make sure there are no blanks in id field
data %>% filter(member_id_hashed == "") %>% summarise(n())
Field names are all set. Missing is now set to missing (NA) and missingess was examined. There was no missing on ID variable and most of the rest made sense based on the PHQ9/SDS split. Note SDS_days_missed (1904) and SDS_days_unproductive (1904) because they have higher missing than the rest of the SDS variables meaning some members just skipped those questions on the SDS questionnaire.
dis_id <- data %>%
distinct(member_id_hashed) %>%
summarise(distinct_id = n())
There are 1166 distinct individuals that used the platform.
mean_id_count <- data %>%
group_by(member_id_hashed) %>%
summarise(id_count = n()) %>%
summarise(round(mean(id_count), 2))
The average number of times that a member interacts with the platform is 2.8 times.
p_base <- data %>%
filter(questionnaire_kind == 'PHQ9') %>%
group_by(member_id_hashed) %>%
mutate(min_date = min(questionnaire_created_at),
p_baseline = if_else(questionnaire_created_at == min_date, 1, 0),
PHQ9_score = as.numeric(PHQ9_score)) %>%
filter(p_baseline == 1) %>%
ungroup()
# quick check of distribution
tab1(p_base$PHQ9_score, main = "Distribution of PHQ9 Baesline Scores", col = "grey")
## p_base$PHQ9_score :
## Frequency Percent Cum. percent
## 0 37 3.2 3.2
## 1 39 3.4 6.6
## 2 86 7.4 14.0
## 3 90 7.8 21.7
## 4 90 7.8 29.5
## 5 99 8.5 38.0
## 6 94 8.1 46.1
## 7 90 7.8 53.9
## 8 89 7.7 61.6
## 9 63 5.4 67.0
## 10 51 4.4 71.4
## 11 61 5.3 76.6
## 12 39 3.4 80.0
## 13 45 3.9 83.9
## 14 37 3.2 87.1
## 15 24 2.1 89.1
## 16 38 3.3 92.4
## 17 14 1.2 93.6
## 18 15 1.3 94.9
## 19 13 1.1 96.0
## 20 17 1.5 97.5
## 21 3 0.3 97.8
## 22 4 0.3 98.1
## 23 12 1.0 99.1
## 24 3 0.3 99.4
## 25 4 0.3 99.7
## 26 3 0.3 100.0
## Total 1160 100.0 100.0
Assuming the baseline total PHQ9 score is the PHQ9 score from the minimum questionnaire date for a member. There is a positive skewed distribution. A quick visualization was provided with the tab1 function, but could be made in ggplot too (code provided for basic histogram in ggplot below).
ggplot_example <- ggplot(p_base, aes(x = PHQ9_score)) +
geom_histogram(binwidth = 1) +
labs(title = "Distribution of PHQ9 Baesline Scores", x = "PHQ9 score", y = "Count") +
theme_classic()
mode <- function(x) {
which.max(tabulate(x))
}
# most summary statistics
stat.desc(p_base$PHQ9_score)
## nbr.val nbr.null nbr.na min max range
## 1160.0000000 37.0000000 0.0000000 0.0000000 26.0000000 26.0000000
## sum median mean SE.mean CI.mean.0.95 var
## 9301.0000000 7.0000000 8.0181034 0.1577786 0.3095637 28.8771526
## std.dev coef.var
## 5.3737466 0.6702017
# want mode and quantiles
p_base %>%
summarise(
mode = mode(PHQ9_score),
quantile_25 = quantile(PHQ9_score, 0.25),
quantile_75 = quantile(PHQ9_score, 0.75)
)
## # A tibble: 1 × 3
## mode quantile_25 quantile_75
## <int> <dbl> <dbl>
## 1 5 4 11
Baseline total PHQ9 scores range from 0 to 26. The mean score is 8.02 and the median score is 7. The majority (68%) falls between 2.65 and 13.39.
p_score <- data %>%
filter(questionnaire_kind == 'PHQ9') %>%
group_by(member_id_hashed) %>%
mutate(min_date = min(questionnaire_created_at),
max_date= max(questionnaire_created_at),
p_baseline = if_else(questionnaire_created_at == min_date, 1, 0),
p_recent = if_else(questionnaire_created_at == max_date, 1, 0),
PHQ9_score = as.numeric(PHQ9_score),
p_type = if_else(p_baseline == 1 , "baseline",
if_else(p_recent == 1, "recent",
"middle"))) %>%
arrange(member_id_hashed) %>%
dplyr::select(member_id_hashed, p_type, PHQ9_score)
p_score_wide <- p_score %>%
filter(p_type %in% c("baseline", "recent")) %>%
dcast(member_id_hashed ~ p_type, value.var = "PHQ9_score") %>%
mutate(p_change = recent - baseline) %>%
filter(!is.na(p_change))
count_p_change <- p_score_wide %>% summarise(count = n())
mean_p_change <- round(mean(p_score_wide$p_change), 2)
print(mean_p_change)
## [1] -0.9
There were 266 members that had a baseline and follow-up total PHQ9 score. For those members with multiple total PHQ9 scores the most recent score was used (this method is used for the rest of the analyses too). The average change in total PHQ9 score for members using the platform was -0.9.
dep_p <- data %>%
filter(questionnaire_kind == 'PHQ9') %>%
group_by(member_id_hashed) %>%
mutate(min_date = min(questionnaire_created_at),
p_baseline = if_else(questionnaire_created_at == min_date, 1, 0),
p_type = if_else(p_baseline == 1 , "baseline", NULL)) %>%
arrange(member_id_hashed) %>%
dplyr::select(member_id_hashed, p_type, PHQ9_positive)
dep_p_wide <- dep_p %>%
filter(p_type %in% c("baseline", "recent")) %>%
dcast(member_id_hashed ~ p_type, value.var = "PHQ9_positive") %>%
rename(dep_p_base = baseline) %>%
filter(dep_p_base == TRUE) %>%
inner_join(p_score_wide, by = 'member_id_hashed')
count_dep_p_change = dep_p_wide %>% summarise(count = n())
mean_dep_p_change = round(mean(dep_p_wide$p_change), 2)
print(mean_dep_p_change)
## [1] -1.44
Assuming that PHQ9_positive being “TRUE” represents members that are depressed, there are 220 members that had depression at baseline and completed a baseline and follow-up PHQ9. The average change in total PHQ9 score for depressed individuals using the platform was -1.44.
s_unprod <- data %>%
filter(questionnaire_kind == 'SDS') %>%
group_by(member_id_hashed) %>%
mutate(min_date = min(questionnaire_created_at),
max_date= max(questionnaire_created_at),
s_baseline = if_else(questionnaire_created_at == min_date, 1, 0),
s_recent = if_else(questionnaire_created_at == max_date, 1, 0),
SDS_days_unproductive = as.numeric(SDS_days_unproductive),
s_type = if_else(s_baseline == 1 , "baseline",
if_else(s_recent == 1, "recent",
"middle"))) %>%
arrange(member_id_hashed) %>%
dplyr::select(member_id_hashed, s_type, SDS_days_unproductive)
s_unprod_wide <- s_unprod %>%
filter(s_type %in% c("baseline", "recent")) %>%
dcast(member_id_hashed ~ s_type, value.var = "SDS_days_unproductive") %>%
mutate(s_change = recent - baseline) %>%
filter(!is.na(s_change))
count_s_change = s_unprod_wide %>% summarise(count = n())
mean_s_change = round(mean(s_unprod_wide$s_change), 2)
print(mean_s_change)
## [1] -0.11
There were 216 members that had a baseline and follow-up SDS_days_unproductive. For those members with multiple follow-up SDS_days_unproductive scores the most recent score was used (this method is used for the rest of the analyses too). The average change in SDS unproductive days for members using the platform was -0.11.
dep_p_wide_2 <- dep_p %>%
filter(p_type %in% c("baseline", "recent")) %>%
dcast(member_id_hashed ~ p_type, value.var = "PHQ9_positive") %>%
rename(dep_p_base = baseline)
data_comb <- data %>%
dplyr::select(member_id_hashed) %>%
inner_join(dep_p_wide_2, by = 'member_id_hashed') %>%
inner_join(p_score_wide %>% dplyr::select(member_id_hashed, p_change), by = 'member_id_hashed') %>%
inner_join(s_unprod_wide %>% dplyr::select(member_id_hashed, s_change), by = 'member_id_hashed') %>%
distinct()
# tab1(data_comb$p_change)
# tab1(data_comb$s_change)
ggscatter(data_comb, x = "p_change", y = "s_change",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
title = "Relationship between symptomatic and functional improvement in all Spring Health users",
xlab = "PHQ9 Change", ylab = "SDS Days Unproductive Change")
count_data_comb = data_comb %>% summarise(count = n())
cor_p_s <- round(cor(data_comb$p_change, data_comb$s_change),2)
dep_data_comb <- data %>%
dplyr::select(member_id_hashed) %>%
inner_join(dep_p_wide_2, by = 'member_id_hashed') %>%
inner_join(p_score_wide %>% dplyr::select(member_id_hashed, p_change), by = 'member_id_hashed') %>%
inner_join(s_unprod_wide %>% dplyr::select(member_id_hashed, s_change), by = 'member_id_hashed') %>%
filter(dep_p_base == TRUE) %>%
distinct()
# tab1(dep_data_comb$p_change)
# tab1(dep_data_comb$s_change)
ggscatter(dep_data_comb, x = "p_change", y = "s_change",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
title = "Relationship between symptomatic and functional improvement in Spring Health users with depression",
xlab = "PHQ9 Change", ylab = "SDS Days Unproductive Change")
count_dep_data_comb = dep_data_comb %>% summarise(count = n())
dep_cor_p_s <- round(cor(dep_data_comb$p_change, dep_data_comb$s_change), 2)
There were 187 members that had change data for both the total PHQ9 score and the SDS unproductive data. The correlation between the symptom change (total PHQ9 score change) and functional change (SDS unproductive days change) is 0.48. If we filter to just those members with depression at baseline (PHQ9 positive is “TRUE”) there are 159 members with complete change data and the correlation is 0.47. There is not much difference between all the members using Spring Health and just members with depression using Spring Health; both correlations are moderate positive linear relationships that are statistically significant. In general, that means as symptoms improve, function also improves.
Given the above information it does appear that interacting with the Spring Health platform is beneficial. We see about a third of members that interact with the platform improving, a third with no change, and a third worsening. Additionally, the average change in total PHQ9 score and SDS days unproductive are both negative indicating that symptoms and function are improving from baseline to follow-up in general. This relationship is similar among those with depression and all users. To make a stronger conclusion additional evidence is needed. Some follow-up analyses would be to compare symptom and function change over time in those using the Spring Health platform versus a traditional standard of care (i.e. in-person therapy, medication, and/or a combination), preferably with random assignment.
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.2.0 (2022-04-22 ucrt)
## os Windows 10 x64 (build 22000)
## system x86_64, mingw32
## ui RTerm
## language (EN)
## collate English_United States.utf8
## ctype English_United States.utf8
## tz America/New_York
## date 2022-04-26
## pandoc 2.17.1.1 @ C:/Program Files/RStudio/bin/quarto/bin/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## ! package * version date (UTC) lib source
## P abind 1.4-5 2016-07-21 [?] CRAN (R 4.2.0)
## P assertthat 0.2.1 2019-03-21 [?] CRAN (R 4.2.0)
## P backports 1.4.1 2021-12-13 [?] CRAN (R 4.2.0)
## boot 1.3-28 2021-05-03 [2] CRAN (R 4.2.0)
## P brio 1.1.3 2021-11-30 [?] CRAN (R 4.2.0)
## P broom 0.8.0 2022-04-13 [?] CRAN (R 4.2.0)
## P bslib 0.3.1 2021-10-06 [?] CRAN (R 4.2.0)
## P cachem 1.0.6 2021-08-19 [?] CRAN (R 4.2.0)
## P callr 3.7.0 2021-04-20 [?] CRAN (R 4.2.0)
## P car 3.0-12 2021-11-06 [?] CRAN (R 4.2.0)
## P carData 3.0-5 2022-01-06 [?] CRAN (R 4.2.0)
## P cellranger 1.1.0 2016-07-27 [?] CRAN (R 4.2.0)
## P cli 3.2.0 2022-02-14 [?] CRAN (R 4.2.0)
## P colorspace 2.0-3 2022-02-21 [?] CRAN (R 4.2.0)
## P crayon 1.5.1 2022-03-26 [?] CRAN (R 4.2.0)
## P DBI 1.1.2 2021-12-20 [?] CRAN (R 4.2.0)
## P dbplyr 2.1.1 2021-04-06 [?] CRAN (R 4.2.0)
## P desc 1.4.1 2022-03-06 [?] CRAN (R 4.2.0)
## P devtools * 2.4.3 2021-11-30 [?] CRAN (R 4.2.0)
## P digest 0.6.29 2021-12-01 [?] CRAN (R 4.2.0)
## P dplyr * 1.0.8 2022-02-08 [?] CRAN (R 4.2.0)
## P ellipsis 0.3.2 2021-04-29 [?] CRAN (R 4.2.0)
## P epiDisplay * 3.5.0.1 2018-05-10 [?] CRAN (R 4.2.0)
## P evaluate 0.15 2022-02-18 [?] CRAN (R 4.2.0)
## P fansi 1.0.3 2022-03-24 [?] CRAN (R 4.2.0)
## P farver 2.1.0 2021-02-28 [?] CRAN (R 4.2.0)
## P fastmap 1.1.0 2021-01-25 [?] CRAN (R 4.2.0)
## P forcats * 0.5.1 2021-01-27 [?] CRAN (R 4.2.0)
## foreign * 0.8-82 2022-01-16 [2] CRAN (R 4.2.0)
## P fs 1.5.2 2021-12-08 [?] CRAN (R 4.2.0)
## P generics 0.1.2 2022-01-31 [?] CRAN (R 4.2.0)
## P ggplot2 * 3.3.5 2021-06-25 [?] CRAN (R 4.2.0)
## P ggpubr * 0.4.0 2020-06-27 [?] CRAN (R 4.2.0)
## P ggsignif 0.6.3 2021-09-09 [?] CRAN (R 4.2.0)
## P glue 1.6.2 2022-02-24 [?] CRAN (R 4.2.0)
## P gtable 0.3.0 2019-03-25 [?] CRAN (R 4.2.0)
## P haven 2.5.0 2022-04-15 [?] CRAN (R 4.2.0)
## P highr 0.9 2021-04-16 [?] CRAN (R 4.2.0)
## P hms 1.1.1 2021-09-26 [?] CRAN (R 4.2.0)
## P htmltools 0.5.2 2021-08-25 [?] CRAN (R 4.2.0)
## P httr 1.4.2 2020-07-20 [?] CRAN (R 4.2.0)
## P jquerylib 0.1.4 2021-04-26 [?] CRAN (R 4.2.0)
## P jsonlite 1.8.0 2022-02-22 [?] CRAN (R 4.2.0)
## P knitr 1.38 2022-03-25 [?] CRAN (R 4.2.0)
## P labeling 0.4.2 2020-10-20 [?] CRAN (R 4.2.0)
## lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.0)
## P lifecycle 1.0.1 2021-09-24 [?] CRAN (R 4.2.0)
## P lubridate 1.8.0 2021-10-07 [?] CRAN (R 4.2.0)
## P magrittr 2.0.3 2022-03-30 [?] CRAN (R 4.2.0)
## MASS * 7.3-56 2022-03-23 [2] CRAN (R 4.2.0)
## Matrix 1.4-1 2022-03-23 [2] CRAN (R 4.2.0)
## P memoise 2.0.1 2021-11-26 [?] CRAN (R 4.2.0)
## mgcv 1.8-40 2022-03-29 [2] CRAN (R 4.2.0)
## P modelr 0.1.8 2020-05-19 [?] CRAN (R 4.2.0)
## P munsell 0.5.0 2018-06-12 [?] CRAN (R 4.2.0)
## nlme 3.1-157 2022-03-25 [2] CRAN (R 4.2.0)
## nnet * 7.3-17 2022-01-16 [2] CRAN (R 4.2.0)
## P pastecs * 1.3.21 2018-03-15 [?] CRAN (R 4.2.0)
## P pillar 1.7.0 2022-02-01 [?] CRAN (R 4.2.0)
## P pkgbuild 1.3.1 2021-12-20 [?] CRAN (R 4.2.0)
## P pkgconfig 2.0.3 2019-09-22 [?] CRAN (R 4.2.0)
## P pkgload 1.2.4 2021-11-30 [?] CRAN (R 4.2.0)
## P plyr 1.8.7 2022-03-24 [?] CRAN (R 4.2.0)
## P prettyunits 1.1.1 2020-01-24 [?] CRAN (R 4.2.0)
## P processx 3.5.3 2022-03-25 [?] CRAN (R 4.2.0)
## P ps 1.6.0 2021-02-28 [?] CRAN (R 4.2.0)
## P purrr * 0.3.4 2020-04-17 [?] CRAN (R 4.2.0)
## P R6 2.5.1 2021-08-19 [?] CRAN (R 4.2.0)
## P Rcpp 1.0.8.3 2022-03-17 [?] CRAN (R 4.2.0)
## P readr * 2.1.2 2022-01-30 [?] CRAN (R 4.2.0)
## P readxl 1.4.0 2022-03-28 [?] CRAN (R 4.2.0)
## P remotes 2.4.2 2021-11-30 [?] CRAN (R 4.2.0)
## renv 0.15.4 2022-03-03 [1] CRAN (R 4.2.0)
## P reprex 2.0.1 2021-08-05 [?] CRAN (R 4.2.0)
## P reshape2 * 1.4.4 2020-04-09 [?] CRAN (R 4.2.0)
## P rlang 1.0.2 2022-03-04 [?] CRAN (R 4.2.0)
## P rmarkdown 2.13 2022-03-10 [?] CRAN (R 4.2.0)
## P rprojroot 2.0.3 2022-04-02 [?] CRAN (R 4.2.0)
## P rstatix 0.7.0 2021-02-13 [?] CRAN (R 4.2.0)
## P rstudioapi 0.13 2020-11-12 [?] CRAN (R 4.2.0)
## P rvest 1.0.2 2021-10-16 [?] CRAN (R 4.2.0)
## P sass 0.4.1 2022-03-23 [?] CRAN (R 4.2.0)
## P scales 1.2.0 2022-04-13 [?] CRAN (R 4.2.0)
## P sessioninfo 1.2.2 2021-12-06 [?] CRAN (R 4.2.0)
## P stringi 1.7.6 2021-11-29 [?] CRAN (R 4.2.0)
## P stringr * 1.4.0 2019-02-10 [?] CRAN (R 4.2.0)
## survival * 3.3-1 2022-03-03 [2] CRAN (R 4.2.0)
## P testthat 3.1.3 2022-03-29 [?] CRAN (R 4.2.0)
## P tibble * 3.1.6 2021-11-07 [?] CRAN (R 4.2.0)
## P tidyr * 1.2.0 2022-02-01 [?] CRAN (R 4.2.0)
## P tidyselect 1.1.2 2022-02-21 [?] CRAN (R 4.2.0)
## P tidyverse * 1.3.1 2021-04-15 [?] CRAN (R 4.2.0)
## P tzdb 0.3.0 2022-03-28 [?] CRAN (R 4.2.0)
## P usethis * 2.1.5 2021-12-09 [?] CRAN (R 4.2.0)
## P utf8 1.2.2 2021-07-24 [?] CRAN (R 4.2.0)
## P vctrs 0.4.1 2022-04-13 [?] CRAN (R 4.2.0)
## P withr 2.5.0 2022-03-03 [?] CRAN (R 4.2.0)
## P xfun 0.30 2022-03-02 [?] CRAN (R 4.2.0)
## P xml2 1.3.3 2021-11-30 [?] CRAN (R 4.2.0)
## P yaml 2.3.5 2022-02-21 [?] CRAN (R 4.2.0)
##
## [1] C:/Users/caitl/Dropbox/Prof/assessments/spring_health/renv/library/R-4.2/x86_64-w64-mingw32
## [2] C:/Program Files/R/R-4.2.0/library
##
## P ── Loaded and on-disk path mismatch.
##
## ──────────────────────────────────────────────────────────────────────────────