#load necessary packages
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts --------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(purrr)
library(tibble)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(corrplot)
## corrplot 0.84 loaded
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
library(fastDummies)

#load in data for framingham.csv into a dataframe called framingham 
framingham <- read.csv("framingham_umn.csv",header = TRUE)

#convert framingham to data.table to speed up things, not useful in this case but why not
framingham <- data.table(framingham)

#filter for only women & whose sysBP was measured and only keeping the entries that are greater than 0. The new result
#is also saved as framingham_women
framingham_women <- framingham %>% filter(gender == "Female" & sysBP > 0 )

#Adding ShockIndex column CholRisk column and rounding ShockIndex to 2 sigfigs
framingham_women <- framingham_women %>% mutate(ShockIndex = heartRate/sysBP) %>% mutate(cholRisk = cut(framingham_women$totChol, breaks = c(0,200,240,600))) 
framingham_women$ShockIndex <- round(framingham_women$ShockIndex,2)

#Converting cholRisk to Readable values/levels - factors 
levels(framingham_women$cholRisk) <- c("Desireable","Borderline","High")

#restricting our results to filter out NAs 
#framingham_women <- framingham_women %>% filter(cholRisk != "NA")
framingham_women <- na.omit(framingham_women)

#using plotly and ggplot to graph the results 
plot_ly(framingham_women, y = ~ShockIndex, x = ~cholRisk, color = ~cholRisk, type = "box", jitter = 0.5)
#creating dummy variables for Chol levels
framingham_women <- framingham_women %>% dummy_cols()

paste("here are some summary statistics by groups - gender is women")
## [1] "here are some summary statistics by groups - gender is women"
stats_by_cholRisk_women <- framingham_women %>% group_by(cholRisk) %>% summarise(counts = n(), mean = mean(ShockIndex), median(ShockIndex), sd(ShockIndex), min(ShockIndex),max(ShockIndex))
stats_by_cholRisk_women
## # A tibble: 3 x 7
##   cholRisk counts  mean `median(ShockIn~ `sd(ShockIndex)` `min(ShockIndex~
##   <fct>     <int> <dbl>            <dbl>            <dbl>            <dbl>
## 1 Desirea~    430 0.624             0.62            0.120             0.3 
## 2 Borderl~    667 0.599             0.59            0.123             0.27
## 3 High        938 0.572             0.56            0.121             0.28
## # ... with 1 more variable: `max(ShockIndex)` <dbl>
#creating correlation matrix and removing gender and cholrisk
paste("Here is a correlation matrix") 
## [1] "Here is a correlation matrix"
framingham_w_cor <- framingham_women %>% select(-c("gender","cholRisk","gender_Female","gender_Male"))
framingham_w_cor <- na.omit(framingham_w_cor)
framingham_corrplot <- corrplot(cor(framingham_w_cor[1:19]), order = "hclust", addrect = 3, insig = "blank")

framingham_corrplot
##                     prevalentStroke          age     totChol cholRisk_High
## prevalentStroke        1.0000000000  0.047271061  0.02831146    0.03178750
## age                    0.0472710606  1.000000000  0.42200327    0.36894212
## totChol                0.0283114584  0.422003268  1.00000000    0.78869271
## cholRisk_High          0.0317874971  0.368942120  0.78869271    1.00000000
## diabetes               0.0287117996  0.106839674  0.08626602    0.04725239
## glucose                0.0404337882  0.136308106  0.06063831    0.03516023
## TenYearCHD             0.0493839063  0.237554260  0.10971456    0.07737977
## BPMeds                 0.1183429594  0.169410031  0.12215901    0.10293668
## BMI                    0.0572298600  0.231135843  0.13919912    0.11645236
## prevalentHyp           0.0730280736  0.416002309  0.22365110    0.17936819
## sysBP                  0.0668770881  0.480582225  0.27745192    0.23516959
## diaBP                  0.0616367208  0.316955889  0.20935346    0.18233672
## education             -0.0569454566 -0.168323377 -0.02850528   -0.01464851
## cholRisk_Desireable   -0.0398647818 -0.367560380 -0.65494392   -0.47862438
## currentSmoker         -0.0362646259 -0.266858418 -0.09492085   -0.07139319
## cigsPerDay            -0.0338139057 -0.236726214 -0.07521520   -0.05074739
## cholRisk_Borderline    0.0009137857 -0.072128755 -0.26794210   -0.64568095
## heartRate             -0.0220180177  0.003926093  0.06773638    0.06310324
## ShockIndex            -0.0670887415 -0.384817564 -0.18271531   -0.14994294
##                        diabetes     glucose   TenYearCHD      BPMeds
## prevalentStroke      0.02871180  0.04043379  0.049383906  0.11834296
## age                  0.10683967  0.13630811  0.237554260  0.16941003
## totChol              0.08626602  0.06063831  0.109714557  0.12215901
## cholRisk_High        0.04725239  0.03516023  0.077379768  0.10293668
## diabetes             1.00000000  0.60697655  0.083649736  0.08261234
## glucose              0.60697655  1.00000000  0.125438993  0.09471318
## TenYearCHD           0.08364974  0.12543899  1.000000000  0.11241604
## BPMeds               0.08261234  0.09471318  0.112416040  1.00000000
## BMI                  0.11320601  0.11615214  0.115505774  0.14447921
## prevalentHyp         0.11637108  0.13710081  0.210953099  0.29711463
## sysBP                0.13158111  0.17867382  0.233752110  0.28160716
## diaBP                0.08713862  0.10558584  0.156400745  0.22754043
## education           -0.06565254 -0.04360133 -0.088393335 -0.02438532
## cholRisk_Desireable -0.04448309 -0.05911615 -0.065365195 -0.06571680
## currentSmoker       -0.02087924 -0.04479386 -0.034458593 -0.03646096
## cigsPerDay          -0.01327749 -0.05569514 -0.001415002 -0.02711296
## cholRisk_Borderline -0.01149244  0.01407493 -0.025324815 -0.05215885
## heartRate            0.06559390  0.10205260  0.009384984 -0.01503976
## ShockIndex          -0.03705703 -0.05586871 -0.158134890 -0.21123926
##                             BMI prevalentHyp       sysBP       diaBP
## prevalentStroke      0.05722986   0.07302807  0.06687709  0.06163672
## age                  0.23113584   0.41600231  0.48058223  0.31695589
## totChol              0.13919912   0.22365110  0.27745192  0.20935346
## cholRisk_High        0.11645236   0.17936819  0.23516959  0.18233672
## diabetes             0.11320601   0.11637108  0.13158111  0.08713862
## glucose              0.11615214   0.13710081  0.17867382  0.10558584
## TenYearCHD           0.11550577   0.21095310  0.23375211  0.15640074
## BPMeds               0.14447921   0.29711463  0.28160716  0.22754043
## BMI                  1.00000000   0.35649261  0.38617384  0.41946995
## prevalentHyp         0.35649261   1.00000000  0.72508873  0.62709752
## sysBP                0.38617384   0.72508873  1.00000000  0.80389997
## diaBP                0.41946995   0.62709752  0.80389997  1.00000000
## education           -0.21872984  -0.12231564 -0.15379215 -0.09980944
## cholRisk_Desireable -0.12386397  -0.15537673 -0.20630282 -0.15946387
## currentSmoker       -0.18997310  -0.16343673 -0.17624109 -0.17296472
## cigsPerDay          -0.15499772  -0.13138688 -0.13193534 -0.12241365
## cholRisk_Borderline -0.01594150  -0.05534773 -0.07031526 -0.05494556
## heartRate            0.07634391   0.12848700  0.16148501  0.17571034
## ShockIndex          -0.25332701  -0.47376153 -0.65335531 -0.50391770
##                        education cholRisk_Desireable currentSmoker   cigsPerDay
## prevalentStroke     -0.056945457         -0.03986478   -0.03626463 -0.033813906
## age                 -0.168323377         -0.36756038   -0.26685842 -0.236726214
## totChol             -0.028505284         -0.65494392   -0.09492085 -0.075215200
## cholRisk_High       -0.014648509         -0.47862438   -0.07139319 -0.050747388
## diabetes            -0.065652545         -0.04448309   -0.02087924 -0.013277494
## glucose             -0.043601333         -0.05911615   -0.04479386 -0.055695140
## TenYearCHD          -0.088393335         -0.06536519   -0.03445859 -0.001415002
## BPMeds              -0.024385315         -0.06571680   -0.03646096 -0.027112961
## BMI                 -0.218729842         -0.12386397   -0.18997310 -0.154997720
## prevalentHyp        -0.122315636         -0.15537673   -0.16343673 -0.131386880
## sysBP               -0.153792145         -0.20630282   -0.17624109 -0.131935343
## diaBP               -0.099809442         -0.15946387   -0.17296472 -0.122413648
## education            1.000000000          0.02794811    0.09980788  0.062618851
## cholRisk_Desireable  0.027948106          1.00000000    0.09906557  0.079372458
## currentSmoker        0.099807880          0.09906557    1.00000000  0.775700162
## cigsPerDay           0.062618851          0.07937246    0.77570016  1.000000000
## cholRisk_Borderline -0.008750467         -0.36142366   -0.01034153 -0.015139109
## heartRate           -0.060067781         -0.05836912    0.03033379  0.059242156
## ShockIndex           0.076445975          0.13435370    0.17474695  0.158543473
##                     cholRisk_Borderline    heartRate  ShockIndex
## prevalentStroke            0.0009137857 -0.022018018 -0.06708874
## age                       -0.0721287545  0.003926093 -0.38481756
## totChol                   -0.2679421045  0.067736381 -0.18271531
## cholRisk_High             -0.6456809506  0.063103241 -0.14994294
## diabetes                  -0.0114924427  0.065593896 -0.03705703
## glucose                    0.0140749260  0.102052595 -0.05586871
## TenYearCHD                -0.0253248147  0.009384984 -0.15813489
## BPMeds                    -0.0521588538 -0.015039760 -0.21123926
## BMI                       -0.0159415002  0.076343912 -0.25332701
## prevalentHyp              -0.0553477329  0.128486995 -0.47376153
## sysBP                     -0.0703152625  0.161485012 -0.65335531
## diaBP                     -0.0549455642  0.175710343 -0.50391770
## education                 -0.0087504670 -0.060067781  0.07644597
## cholRisk_Desireable       -0.3614236585 -0.058369121  0.13435370
## currentSmoker             -0.0103415252  0.030333790  0.17474695
## cigsPerDay                -0.0151391092  0.059242156  0.15854347
## cholRisk_Borderline        1.0000000000 -0.016248476  0.04238354
## heartRate                 -0.0162484757  1.000000000  0.61403629
## ShockIndex                 0.0423835443  0.614036286  1.00000000
#creating a covariance matrix 
framingham_cov <- prcomp(framingham_w_cor, scale = FALSE)
framingham_cov
## Standard deviations (1, .., p=19):
##  [1] 4.718425e+01 2.624069e+01 2.213533e+01 1.199922e+01 8.796434e+00
##  [6] 7.273001e+00 6.075133e+00 3.996306e+00 9.295763e-01 5.528329e-01
## [11] 3.250832e-01 3.066689e-01 3.025819e-01 2.877053e-01 1.805651e-01
## [16] 1.234282e-01 7.564272e-02 2.428989e-02 4.388533e-16
## 
## Rotation (n x k) = (19 x 19):
##                               PC1           PC2           PC3           PC4
## age                  8.674046e-02 -0.0997915193 -5.094865e-02  1.301779e-01
## education           -1.019532e-03  0.0050738650  2.565178e-03  9.823412e-04
## currentSmoker       -1.244876e-03  0.0027411969  1.632525e-03 -6.809291e-03
## cigsPerDay          -1.781211e-02  0.0414011451  1.470585e-02 -1.561741e-01
## BPMeds               6.316784e-04 -0.0016490659 -7.196794e-04  1.203241e-03
## prevalentStroke      6.000928e-05 -0.0001867943 -1.902977e-05  2.617662e-04
## prevalentHyp         3.064210e-03 -0.0101453299 -6.896630e-03  1.024473e-03
## diabetes             3.844177e-04 -0.0022578917  3.264076e-03  1.350865e-04
## totChol              9.698908e-01  0.2203660222  8.411057e-02 -6.400007e-03
## sysBP                2.021298e-01 -0.7175159002 -4.959415e-01  7.894860e-02
## diaBP                8.154143e-02 -0.3144457430 -2.518229e-01 -3.322208e-02
## BMI                  1.820917e-02 -0.0589988431 -3.101059e-02  7.188037e-03
## heartRate            2.403160e-02 -0.0922017463 -7.888157e-03 -9.744428e-01
## glucose              5.417120e-02 -0.5602852725  8.244056e-01  3.981384e-02
## TenYearCHD           9.664929e-04 -0.0025824950 -3.499593e-04  1.424665e-03
## ShockIndex          -6.660097e-04  0.0021109825  1.929873e-03 -7.619392e-03
## cholRisk_Desireable -5.663501e-03 -0.0007550380 -4.588481e-04 -6.061893e-05
## cholRisk_Borderline -2.622644e-03 -0.0009623829  2.619210e-04  3.701587e-04
## cholRisk_High        8.286145e-03  0.0017174208  1.969271e-04 -3.095398e-04
##                               PC5           PC6           PC7           PC8
## age                 -0.3191077038 -6.433806e-01  0.6661180664  0.0560629498
## education            0.0088495511  1.264661e-02 -0.0084037789  0.0442116907
## currentSmoker        0.0401102307 -9.928353e-03  0.0051891807  0.0018153186
## cigsPerDay           0.9202010420 -2.567127e-01  0.2375268878 -0.0500844657
## BPMeds               0.0006885707 -5.294956e-04  0.0001447087 -0.0016228746
## prevalentStroke     -0.0001219420  1.316457e-04  0.0001299182 -0.0004794422
## prevalentHyp        -0.0014974080 -1.928651e-03  0.0030323609 -0.0058919666
## diabetes             0.0005262351  1.144780e-04  0.0004471211 -0.0013900140
## totChol              0.0265232272  4.243109e-02 -0.0319798033  0.0003293839
## sysBP                0.0872783993 -2.273338e-01 -0.3639636097 -0.0139419294
## diaBP                0.0581316743  6.698805e-01  0.5900603365  0.1719059667
## BMI                 -0.0557887238  9.775654e-02  0.1341730166 -0.9810520129
## heartRate           -0.1847469198 -8.427752e-02  0.0027395013  0.0017699201
## glucose              0.0274381683  3.118597e-02  0.0065887817  0.0113750716
## TenYearCHD           0.0004401270 -5.877961e-03  0.0033748368 -0.0023425865
## ShockIndex          -0.0015302237  8.681751e-05  0.0010040682  0.0001589949
## cholRisk_Desireable  0.0022398216  2.622300e-03 -0.0045789819  0.0011327036
## cholRisk_Borderline -0.0023750152 -1.270594e-03  0.0020307342 -0.0015282153
## cholRisk_High        0.0001351937 -1.351706e-03  0.0025482477  0.0003955116
##                               PC9          PC10          PC11          PC12
## age                 -0.0147846516 -3.244667e-03  0.0059664713  3.008292e-03
## education           -0.9984906060  6.466321e-03 -0.0089386912 -1.953271e-02
## currentSmoker       -0.0171929698 -7.218615e-03  0.0213889107  4.973597e-01
## cigsPerDay           0.0015112184  1.965599e-03  0.0002394722 -1.890417e-02
## BPMeds              -0.0054113542 -9.275694e-03 -0.0992924274  5.977691e-02
## prevalentStroke      0.0035689203  7.435360e-04 -0.0091012237  2.396714e-05
## prevalentHyp        -0.0045974294 -5.124317e-03 -0.6879634699  5.894624e-01
## diabetes             0.0054337840 -2.165819e-03 -0.0123304333  8.146940e-03
## totChol              0.0015137508  3.564197e-03 -0.0028235683  5.358101e-04
## sysBP               -0.0046836752  8.170426e-04  0.0093721176 -5.114290e-03
## diaBP                0.0092904789 -2.602045e-06  0.0020490593 -2.289570e-03
## BMI                 -0.0442718239 -1.875249e-03  0.0059517426 -1.273683e-03
## heartRate           -0.0041092824 -2.380868e-04 -0.0004536509 -5.765511e-04
## glucose              0.0003058157 -6.182580e-04  0.0008980041  4.357802e-04
## TenYearCHD           0.0134699619 -4.189020e-03 -0.6587350035 -6.309023e-01
## ShockIndex          -0.0000167660 -9.719861e-04 -0.0032804829 -4.421850e-03
## cholRisk_Desireable  0.0045456110 -4.084101e-01 -0.1994867615  3.458444e-02
## cholRisk_Borderline  0.0050555693  8.163897e-01 -0.0057741594  3.858230e-03
## cholRisk_High       -0.0096011803 -4.079796e-01  0.2052609209 -3.844267e-02
##                              PC13          PC14          PC15          PC16
## age                 -0.0035523994  8.030830e-03 -0.0000151164  1.273324e-04
## education            0.0077688256  9.459553e-03  0.0046855487 -5.369036e-03
## currentSmoker       -0.8608494648 -9.385508e-02  0.0115673633 -4.348676e-03
## cigsPerDay           0.0371000284  4.755848e-03  0.0001201743  7.589042e-04
## BPMeds               0.0211132579 -2.101509e-02 -0.9912107164  2.092140e-02
## prevalentStroke      0.0017974761 -1.227442e-02 -0.0485518447  1.365465e-03
## prevalentHyp         0.3479334606 -2.102204e-01  0.1167467737  8.291734e-03
## diabetes             0.0066086881  2.350250e-02 -0.0195256477 -9.992325e-01
## totChol             -0.0007568618  9.181397e-03  0.0002398874  3.985354e-04
## sysBP               -0.0025407475  2.066571e-03  0.0005438257 -2.602575e-04
## diaBP               -0.0040487969  1.411184e-03 -0.0003015908  6.027177e-05
## BMI                 -0.0053165885  2.936667e-03  0.0011184195  1.134761e-03
## heartRate           -0.0005765682  6.929313e-04 -0.0009922439  8.986183e-05
## glucose              0.0006266708  5.438616e-06  0.0004420895  3.980740e-03
## TenYearCHD          -0.3594486749 -1.945488e-01  0.0247172579 -4.249116e-03
## ShockIndex          -0.0059892757  4.462788e-04 -0.0139440684 -1.666774e-02
## cholRisk_Desireable -0.0550895647  6.747077e-01  0.0104320992  1.897276e-02
## cholRisk_Borderline -0.0048166190 -2.031170e-03 -0.0069046809 -1.591680e-03
## cholRisk_High        0.0599061837 -6.726765e-01 -0.0035274182 -1.738108e-02
##                              PC17          PC18          PC19
## age                 -1.407279e-04 -2.552158e-04  0.000000e+00
## education           -3.842388e-03  9.824891e-05 -7.860708e-18
## currentSmoker       -1.224359e-03  2.767286e-03  1.377298e-16
## cigsPerDay          -1.335164e-04 -3.599033e-05  1.071826e-17
## BPMeds               4.938651e-02  1.345735e-02  4.608343e-16
## prevalentStroke     -9.986942e-01 -5.777285e-04  1.022157e-15
## prevalentHyp         3.817116e-03 -4.325335e-03 -9.618130e-17
## diabetes            -5.722338e-04  1.690869e-02  2.999867e-16
## totChol             -7.967674e-05 -3.670507e-05  1.334309e-17
## sysBP               -6.105790e-05 -3.726048e-03 -3.871008e-18
## diaBP                1.400155e-04 -3.305531e-04 -1.445306e-17
## BMI                  2.104820e-04 -1.911755e-04  1.909091e-18
## heartRate           -2.018881e-04  7.499696e-03  1.433718e-17
## glucose              7.642903e-05 -4.250522e-05 -8.562676e-19
## TenYearCHD           6.567471e-03  6.741185e-03  5.134166e-17
## ShockIndex           1.243939e-03 -9.996944e-01 -2.612304e-15
## cholRisk_Desireable -7.343772e-03  1.053058e-03  5.773503e-01
## cholRisk_Borderline  1.029984e-03 -6.371653e-04  5.773503e-01
## cholRisk_High        6.313788e-03 -4.158924e-04  5.773503e-01
#completed