#load necessary packages
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts --------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(purrr)
library(tibble)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
library(fastDummies)
library(corrplot)
## corrplot 0.84 loaded
#load in data for framingham.csv into a dataframe called framingham_v1 - gender is being brough in as a factor 
framingham <- read.csv("framingham_umn.csv",header = TRUE)


#convert framingham to data.table to speed up things, not useful in this case but why not
framingham <- data.table(framingham)


#Creating framingham_full for men, and women where BP is higher than 0 to filter out any missing/negative results 
framingham_full <- framingham %>% filter(gender == "Female" | gender == "Male" & sysBP > 0) %>% mutate(ShockIndex = heartRate/sysBP) 


#changing diabetic to factor 
framingham_full$diabetes <- as.factor(framingham_full$diabetes)

#creating levels, not really useful but gfood to have for further analysis
levels(framingham_full$diabetes) <- c("Not Diabetic","Diabetic")

#creating dummy columns
framingham_full <- framingham_full %>% dummy_cols()


#plotting the results - whisker plot
plot_ly(framingham_full, y = ~ShockIndex, color = ~diabetes, type = "box", jitter = 0.5)
## Warning: Ignoring 1 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
paste("here are some summary statistics by groups for both genders")
## [1] "here are some summary statistics by groups for both genders"
stats_by_diabetic_status <- framingham_full %>% group_by(diabetes) %>% summarise(counts = n(), mean = mean(ShockIndex,na.rm = T), median(ShockIndex,na.rm = T), sd(ShockIndex, na.rm = T), min(ShockIndex, na.rm = T),max(ShockIndex, na.rm = T))
stats_by_diabetic_status
## # A tibble: 2 x 7
##   diabetes counts  mean `median(ShockIn~ `sd(ShockIndex,~ `min(ShockIndex~
##   <fct>     <int> <dbl>            <dbl>            <dbl>            <dbl>
## 1 Not Dia~   4131 0.586            0.579            0.115            0.269
## 2 Diabetic    109 0.562            0.544            0.148            0.289
## # ... with 1 more variable: `max(ShockIndex, na.rm = T)` <dbl>
#calculating proportion of population to see what percentage is above .9 shock index
f1 <- framingham_full %>% group_by(diabetes) %>% filter(ShockIndex > .9) %>% count(diabetes)
f2 <- framingham_full %>% group_by(diabetes) %>% count(diabetes)
percentage_by_population <- (f1$n/f2$n) * 100
population_proportion <- cbind(as.data.frame(f1),as.data.frame(percentage_by_population))

#renaming columns
colnames(population_proportion)[1] <- "Is patient diabetic?"
colnames(population_proportion)[2] <- "# of people with SI > .9"

#printing population_proportion
population_proportion <- as.data.frame(population_proportion)
population_proportion
##   Is patient diabetic? # of people with SI > .9 percentage_by_population
## 1         Not Diabetic                       35                0.8472525
## 2             Diabetic                        4                3.6697248
if (population_proportion[2,3] > population_proportion[1,3]){
  print ("ShockIndex proportion is higher in diabetic population")
} else {
  print ("ShockIndex proportion is low in non-diabetic population")
}
## [1] "ShockIndex proportion is higher in diabetic population"