#load necessary packages
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts --------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(purrr)
library(tibble)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(corrplot)
## corrplot 0.84 loaded
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(fastDummies)
#load in data for framingham.csv into a dataframe called framingham
framingham <- read.csv("framingham_umn.csv",header = TRUE)
#convert framingham to data.table to speed up things, not useful in this case but why not
framingham <- data.table(framingham)
#filter for only women & whose sysBP was measured and only keeping the entries that are greater than 0. The new result
#is also saved as framingham_women
framingham_women <- framingham %>% filter(gender == "Female" & sysBP > 0 )
#Adding ShockIndex column CholRisk column and rounding ShockIndex to 2 sigfigs
framingham_women <- framingham_women %>% mutate(ShockIndex = heartRate/sysBP) %>% mutate(cholRisk = cut(framingham_women$totChol, breaks = c(0,200,240,600)))
framingham_women$ShockIndex <- round(framingham_women$ShockIndex,2)
#Converting cholRisk to Readable values/levels - factors
levels(framingham_women$cholRisk) <- c("Desireable","Borderline","High")
#restricting our results to filter out NAs
#framingham_women <- framingham_women %>% filter(cholRisk != "NA")
framingham_women <- na.omit(framingham_women)
#using plotly and ggplot to graph the results
plot_ly(framingham_women, y = ~ShockIndex, x = ~cholRisk, color = ~cholRisk, type = "box", jitter = 0.5)
#creating dummy variables for Chol levels
framingham_women <- framingham_women %>% dummy_cols()
paste("here are some summary statistics by groups - gender is women")
## [1] "here are some summary statistics by groups - gender is women"
stats_by_cholRisk_women <- framingham_women %>% group_by(cholRisk) %>% summarise(counts = n(), mean = mean(ShockIndex), median(ShockIndex), sd(ShockIndex), min(ShockIndex),max(ShockIndex))
stats_by_cholRisk_women
## # A tibble: 3 x 7
## cholRisk counts mean `median(ShockIn~ `sd(ShockIndex)` `min(ShockIndex~
## <fct> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Desirea~ 430 0.624 0.62 0.120 0.3
## 2 Borderl~ 667 0.599 0.59 0.123 0.27
## 3 High 938 0.572 0.56 0.121 0.28
## # ... with 1 more variable: `max(ShockIndex)` <dbl>
#creating correlation matrix and removing gender and cholrisk
paste("Here is a correlation matrix")
## [1] "Here is a correlation matrix"
framingham_w_cor <- framingham_women %>% select(-c("gender","cholRisk","gender_Female","gender_Male"))
framingham_w_cor <- na.omit(framingham_w_cor)
framingham_corrplot <- corrplot(cor(framingham_w_cor[1:19]), order = "hclust", addrect = 3, insig = "blank")

framingham_corrplot
## prevalentStroke age totChol cholRisk_High
## prevalentStroke 1.0000000000 0.047271061 0.02831146 0.03178750
## age 0.0472710606 1.000000000 0.42200327 0.36894212
## totChol 0.0283114584 0.422003268 1.00000000 0.78869271
## cholRisk_High 0.0317874971 0.368942120 0.78869271 1.00000000
## diabetes 0.0287117996 0.106839674 0.08626602 0.04725239
## glucose 0.0404337882 0.136308106 0.06063831 0.03516023
## TenYearCHD 0.0493839063 0.237554260 0.10971456 0.07737977
## BPMeds 0.1183429594 0.169410031 0.12215901 0.10293668
## BMI 0.0572298600 0.231135843 0.13919912 0.11645236
## prevalentHyp 0.0730280736 0.416002309 0.22365110 0.17936819
## sysBP 0.0668770881 0.480582225 0.27745192 0.23516959
## diaBP 0.0616367208 0.316955889 0.20935346 0.18233672
## education -0.0569454566 -0.168323377 -0.02850528 -0.01464851
## cholRisk_Desireable -0.0398647818 -0.367560380 -0.65494392 -0.47862438
## currentSmoker -0.0362646259 -0.266858418 -0.09492085 -0.07139319
## cigsPerDay -0.0338139057 -0.236726214 -0.07521520 -0.05074739
## cholRisk_Borderline 0.0009137857 -0.072128755 -0.26794210 -0.64568095
## heartRate -0.0220180177 0.003926093 0.06773638 0.06310324
## ShockIndex -0.0670887415 -0.384817564 -0.18271531 -0.14994294
## diabetes glucose TenYearCHD BPMeds
## prevalentStroke 0.02871180 0.04043379 0.049383906 0.11834296
## age 0.10683967 0.13630811 0.237554260 0.16941003
## totChol 0.08626602 0.06063831 0.109714557 0.12215901
## cholRisk_High 0.04725239 0.03516023 0.077379768 0.10293668
## diabetes 1.00000000 0.60697655 0.083649736 0.08261234
## glucose 0.60697655 1.00000000 0.125438993 0.09471318
## TenYearCHD 0.08364974 0.12543899 1.000000000 0.11241604
## BPMeds 0.08261234 0.09471318 0.112416040 1.00000000
## BMI 0.11320601 0.11615214 0.115505774 0.14447921
## prevalentHyp 0.11637108 0.13710081 0.210953099 0.29711463
## sysBP 0.13158111 0.17867382 0.233752110 0.28160716
## diaBP 0.08713862 0.10558584 0.156400745 0.22754043
## education -0.06565254 -0.04360133 -0.088393335 -0.02438532
## cholRisk_Desireable -0.04448309 -0.05911615 -0.065365195 -0.06571680
## currentSmoker -0.02087924 -0.04479386 -0.034458593 -0.03646096
## cigsPerDay -0.01327749 -0.05569514 -0.001415002 -0.02711296
## cholRisk_Borderline -0.01149244 0.01407493 -0.025324815 -0.05215885
## heartRate 0.06559390 0.10205260 0.009384984 -0.01503976
## ShockIndex -0.03705703 -0.05586871 -0.158134890 -0.21123926
## BMI prevalentHyp sysBP diaBP
## prevalentStroke 0.05722986 0.07302807 0.06687709 0.06163672
## age 0.23113584 0.41600231 0.48058223 0.31695589
## totChol 0.13919912 0.22365110 0.27745192 0.20935346
## cholRisk_High 0.11645236 0.17936819 0.23516959 0.18233672
## diabetes 0.11320601 0.11637108 0.13158111 0.08713862
## glucose 0.11615214 0.13710081 0.17867382 0.10558584
## TenYearCHD 0.11550577 0.21095310 0.23375211 0.15640074
## BPMeds 0.14447921 0.29711463 0.28160716 0.22754043
## BMI 1.00000000 0.35649261 0.38617384 0.41946995
## prevalentHyp 0.35649261 1.00000000 0.72508873 0.62709752
## sysBP 0.38617384 0.72508873 1.00000000 0.80389997
## diaBP 0.41946995 0.62709752 0.80389997 1.00000000
## education -0.21872984 -0.12231564 -0.15379215 -0.09980944
## cholRisk_Desireable -0.12386397 -0.15537673 -0.20630282 -0.15946387
## currentSmoker -0.18997310 -0.16343673 -0.17624109 -0.17296472
## cigsPerDay -0.15499772 -0.13138688 -0.13193534 -0.12241365
## cholRisk_Borderline -0.01594150 -0.05534773 -0.07031526 -0.05494556
## heartRate 0.07634391 0.12848700 0.16148501 0.17571034
## ShockIndex -0.25332701 -0.47376153 -0.65335531 -0.50391770
## education cholRisk_Desireable currentSmoker cigsPerDay
## prevalentStroke -0.056945457 -0.03986478 -0.03626463 -0.033813906
## age -0.168323377 -0.36756038 -0.26685842 -0.236726214
## totChol -0.028505284 -0.65494392 -0.09492085 -0.075215200
## cholRisk_High -0.014648509 -0.47862438 -0.07139319 -0.050747388
## diabetes -0.065652545 -0.04448309 -0.02087924 -0.013277494
## glucose -0.043601333 -0.05911615 -0.04479386 -0.055695140
## TenYearCHD -0.088393335 -0.06536519 -0.03445859 -0.001415002
## BPMeds -0.024385315 -0.06571680 -0.03646096 -0.027112961
## BMI -0.218729842 -0.12386397 -0.18997310 -0.154997720
## prevalentHyp -0.122315636 -0.15537673 -0.16343673 -0.131386880
## sysBP -0.153792145 -0.20630282 -0.17624109 -0.131935343
## diaBP -0.099809442 -0.15946387 -0.17296472 -0.122413648
## education 1.000000000 0.02794811 0.09980788 0.062618851
## cholRisk_Desireable 0.027948106 1.00000000 0.09906557 0.079372458
## currentSmoker 0.099807880 0.09906557 1.00000000 0.775700162
## cigsPerDay 0.062618851 0.07937246 0.77570016 1.000000000
## cholRisk_Borderline -0.008750467 -0.36142366 -0.01034153 -0.015139109
## heartRate -0.060067781 -0.05836912 0.03033379 0.059242156
## ShockIndex 0.076445975 0.13435370 0.17474695 0.158543473
## cholRisk_Borderline heartRate ShockIndex
## prevalentStroke 0.0009137857 -0.022018018 -0.06708874
## age -0.0721287545 0.003926093 -0.38481756
## totChol -0.2679421045 0.067736381 -0.18271531
## cholRisk_High -0.6456809506 0.063103241 -0.14994294
## diabetes -0.0114924427 0.065593896 -0.03705703
## glucose 0.0140749260 0.102052595 -0.05586871
## TenYearCHD -0.0253248147 0.009384984 -0.15813489
## BPMeds -0.0521588538 -0.015039760 -0.21123926
## BMI -0.0159415002 0.076343912 -0.25332701
## prevalentHyp -0.0553477329 0.128486995 -0.47376153
## sysBP -0.0703152625 0.161485012 -0.65335531
## diaBP -0.0549455642 0.175710343 -0.50391770
## education -0.0087504670 -0.060067781 0.07644597
## cholRisk_Desireable -0.3614236585 -0.058369121 0.13435370
## currentSmoker -0.0103415252 0.030333790 0.17474695
## cigsPerDay -0.0151391092 0.059242156 0.15854347
## cholRisk_Borderline 1.0000000000 -0.016248476 0.04238354
## heartRate -0.0162484757 1.000000000 0.61403629
## ShockIndex 0.0423835443 0.614036286 1.00000000
#creating a covariance matrix
framingham_cov <- prcomp(framingham_w_cor, scale = FALSE)
framingham_cov
## Standard deviations (1, .., p=19):
## [1] 4.718425e+01 2.624069e+01 2.213533e+01 1.199922e+01 8.796434e+00
## [6] 7.273001e+00 6.075133e+00 3.996306e+00 9.295763e-01 5.528329e-01
## [11] 3.250832e-01 3.066689e-01 3.025819e-01 2.877053e-01 1.805651e-01
## [16] 1.234282e-01 7.564272e-02 2.428989e-02 4.388533e-16
##
## Rotation (n x k) = (19 x 19):
## PC1 PC2 PC3 PC4
## age 8.674046e-02 -0.0997915193 -5.094865e-02 1.301779e-01
## education -1.019532e-03 0.0050738650 2.565178e-03 9.823412e-04
## currentSmoker -1.244876e-03 0.0027411969 1.632525e-03 -6.809291e-03
## cigsPerDay -1.781211e-02 0.0414011451 1.470585e-02 -1.561741e-01
## BPMeds 6.316784e-04 -0.0016490659 -7.196794e-04 1.203241e-03
## prevalentStroke 6.000928e-05 -0.0001867943 -1.902977e-05 2.617662e-04
## prevalentHyp 3.064210e-03 -0.0101453299 -6.896630e-03 1.024473e-03
## diabetes 3.844177e-04 -0.0022578917 3.264076e-03 1.350865e-04
## totChol 9.698908e-01 0.2203660222 8.411057e-02 -6.400007e-03
## sysBP 2.021298e-01 -0.7175159002 -4.959415e-01 7.894860e-02
## diaBP 8.154143e-02 -0.3144457430 -2.518229e-01 -3.322208e-02
## BMI 1.820917e-02 -0.0589988431 -3.101059e-02 7.188037e-03
## heartRate 2.403160e-02 -0.0922017463 -7.888157e-03 -9.744428e-01
## glucose 5.417120e-02 -0.5602852725 8.244056e-01 3.981384e-02
## TenYearCHD 9.664929e-04 -0.0025824950 -3.499593e-04 1.424665e-03
## ShockIndex -6.660097e-04 0.0021109825 1.929873e-03 -7.619392e-03
## cholRisk_Desireable -5.663501e-03 -0.0007550380 -4.588481e-04 -6.061893e-05
## cholRisk_Borderline -2.622644e-03 -0.0009623829 2.619210e-04 3.701587e-04
## cholRisk_High 8.286145e-03 0.0017174208 1.969271e-04 -3.095398e-04
## PC5 PC6 PC7 PC8
## age -0.3191077038 -6.433806e-01 0.6661180664 0.0560629498
## education 0.0088495511 1.264661e-02 -0.0084037789 0.0442116907
## currentSmoker 0.0401102307 -9.928353e-03 0.0051891807 0.0018153186
## cigsPerDay 0.9202010420 -2.567127e-01 0.2375268878 -0.0500844657
## BPMeds 0.0006885707 -5.294956e-04 0.0001447087 -0.0016228746
## prevalentStroke -0.0001219420 1.316457e-04 0.0001299182 -0.0004794422
## prevalentHyp -0.0014974080 -1.928651e-03 0.0030323609 -0.0058919666
## diabetes 0.0005262351 1.144780e-04 0.0004471211 -0.0013900140
## totChol 0.0265232272 4.243109e-02 -0.0319798033 0.0003293839
## sysBP 0.0872783993 -2.273338e-01 -0.3639636097 -0.0139419294
## diaBP 0.0581316743 6.698805e-01 0.5900603365 0.1719059667
## BMI -0.0557887238 9.775654e-02 0.1341730166 -0.9810520129
## heartRate -0.1847469198 -8.427752e-02 0.0027395013 0.0017699201
## glucose 0.0274381683 3.118597e-02 0.0065887817 0.0113750716
## TenYearCHD 0.0004401270 -5.877961e-03 0.0033748368 -0.0023425865
## ShockIndex -0.0015302237 8.681751e-05 0.0010040682 0.0001589949
## cholRisk_Desireable 0.0022398216 2.622300e-03 -0.0045789819 0.0011327036
## cholRisk_Borderline -0.0023750152 -1.270594e-03 0.0020307342 -0.0015282153
## cholRisk_High 0.0001351937 -1.351706e-03 0.0025482477 0.0003955116
## PC9 PC10 PC11 PC12
## age -0.0147846516 -3.244667e-03 0.0059664713 3.008292e-03
## education -0.9984906060 6.466321e-03 -0.0089386912 -1.953271e-02
## currentSmoker -0.0171929698 -7.218615e-03 0.0213889107 4.973597e-01
## cigsPerDay 0.0015112184 1.965599e-03 0.0002394722 -1.890417e-02
## BPMeds -0.0054113542 -9.275694e-03 -0.0992924274 5.977691e-02
## prevalentStroke 0.0035689203 7.435360e-04 -0.0091012237 2.396714e-05
## prevalentHyp -0.0045974294 -5.124317e-03 -0.6879634699 5.894624e-01
## diabetes 0.0054337840 -2.165819e-03 -0.0123304333 8.146940e-03
## totChol 0.0015137508 3.564197e-03 -0.0028235683 5.358101e-04
## sysBP -0.0046836752 8.170426e-04 0.0093721176 -5.114290e-03
## diaBP 0.0092904789 -2.602045e-06 0.0020490593 -2.289570e-03
## BMI -0.0442718239 -1.875249e-03 0.0059517426 -1.273683e-03
## heartRate -0.0041092824 -2.380868e-04 -0.0004536509 -5.765511e-04
## glucose 0.0003058157 -6.182580e-04 0.0008980041 4.357802e-04
## TenYearCHD 0.0134699619 -4.189020e-03 -0.6587350035 -6.309023e-01
## ShockIndex -0.0000167660 -9.719861e-04 -0.0032804829 -4.421850e-03
## cholRisk_Desireable 0.0045456110 -4.084101e-01 -0.1994867615 3.458444e-02
## cholRisk_Borderline 0.0050555693 8.163897e-01 -0.0057741594 3.858230e-03
## cholRisk_High -0.0096011803 -4.079796e-01 0.2052609209 -3.844267e-02
## PC13 PC14 PC15 PC16
## age -0.0035523994 8.030830e-03 -0.0000151164 1.273324e-04
## education 0.0077688256 9.459553e-03 0.0046855487 -5.369036e-03
## currentSmoker -0.8608494648 -9.385508e-02 0.0115673633 -4.348676e-03
## cigsPerDay 0.0371000284 4.755848e-03 0.0001201743 7.589042e-04
## BPMeds 0.0211132579 -2.101509e-02 -0.9912107164 2.092140e-02
## prevalentStroke 0.0017974761 -1.227442e-02 -0.0485518447 1.365465e-03
## prevalentHyp 0.3479334606 -2.102204e-01 0.1167467737 8.291734e-03
## diabetes 0.0066086881 2.350250e-02 -0.0195256477 -9.992325e-01
## totChol -0.0007568618 9.181397e-03 0.0002398874 3.985354e-04
## sysBP -0.0025407475 2.066571e-03 0.0005438257 -2.602575e-04
## diaBP -0.0040487969 1.411184e-03 -0.0003015908 6.027177e-05
## BMI -0.0053165885 2.936667e-03 0.0011184195 1.134761e-03
## heartRate -0.0005765682 6.929313e-04 -0.0009922439 8.986183e-05
## glucose 0.0006266708 5.438616e-06 0.0004420895 3.980740e-03
## TenYearCHD -0.3594486749 -1.945488e-01 0.0247172579 -4.249116e-03
## ShockIndex -0.0059892757 4.462788e-04 -0.0139440684 -1.666774e-02
## cholRisk_Desireable -0.0550895647 6.747077e-01 0.0104320992 1.897276e-02
## cholRisk_Borderline -0.0048166190 -2.031170e-03 -0.0069046809 -1.591680e-03
## cholRisk_High 0.0599061837 -6.726765e-01 -0.0035274182 -1.738108e-02
## PC17 PC18 PC19
## age -1.407279e-04 -2.552158e-04 0.000000e+00
## education -3.842388e-03 9.824891e-05 -7.860708e-18
## currentSmoker -1.224359e-03 2.767286e-03 1.377298e-16
## cigsPerDay -1.335164e-04 -3.599033e-05 1.071826e-17
## BPMeds 4.938651e-02 1.345735e-02 4.608343e-16
## prevalentStroke -9.986942e-01 -5.777285e-04 1.022157e-15
## prevalentHyp 3.817116e-03 -4.325335e-03 -9.618130e-17
## diabetes -5.722338e-04 1.690869e-02 2.999867e-16
## totChol -7.967674e-05 -3.670507e-05 1.334309e-17
## sysBP -6.105790e-05 -3.726048e-03 -3.871008e-18
## diaBP 1.400155e-04 -3.305531e-04 -1.445306e-17
## BMI 2.104820e-04 -1.911755e-04 1.909091e-18
## heartRate -2.018881e-04 7.499696e-03 1.433718e-17
## glucose 7.642903e-05 -4.250522e-05 -8.562676e-19
## TenYearCHD 6.567471e-03 6.741185e-03 5.134166e-17
## ShockIndex 1.243939e-03 -9.996944e-01 -2.612304e-15
## cholRisk_Desireable -7.343772e-03 1.053058e-03 5.773503e-01
## cholRisk_Borderline 1.029984e-03 -6.371653e-04 5.773503e-01
## cholRisk_High 6.313788e-03 -4.158924e-04 5.773503e-01
#completed