\(~\)
\(~\)
\(~\)
diab_pop <- readRDS('C:/Users/jkyle/Documents/GitHub/Intro_Jeff_Data_Science/DATA/diab_pop.RDS')
\(~\)
\(~\)
#### Variable in Data - Definition - Data Type
##### seqn - Respondent sequence number - Identifier
##### riagendr - Gender - Categorical
##### ridageyr - Age in years at screening - Continuous / Numerical
##### ridreth1 - Race/Hispanic origin - Categorical
##### dmdeduc2 - Education level - Adults 20+ - Categorical
##### dmdmartl - Marital status - Categorical
##### indhhin2 - Annual household income - Categorical
##### bmxbmi - Body Mass Index (kg/m**2) - Continuous / Numerical
##### diq010 - Doctor diagnosed diabetes - Categorical / Target
##### lbxglu - Fasting Glucose (mg/dL) - Continuous / Numerical
\(~\)
\(~\)
\(~\)
install_if_not <- function( list.of.packages ) {
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) { install.packages(new.packages) } else { print(paste0("the package '", list.of.packages , "' is already installed")) }
}
\(~\)
\(~\)
\(~\)
\(~\)
library('tidyverse')
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 0.8.5
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
diab_pop.no_na_vals <- diab_pop %>% na.omit()
library('caret')
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
# this will ensure our results are the same every run, to randomize you may use: set.seed(Sys.time())
set.seed(8675309)
# The createDataPartition function is used to create training and test sets
trainIndex <- createDataPartition(diab_pop.no_na_vals$diq010,
p = .6,
list = FALSE,
times = 1)
dm2.train <- diab_pop.no_na_vals[trainIndex, ]
dm2.test <- diab_pop.no_na_vals[-trainIndex, ]
dummyVars
Often dummyVars
or one-hot-encodings
of categorical features are required for models:
dummyVars.dm2Train <- dummyVars( ~. , data = dm2.train)
dm2Train_dummies <- as_tibble( predict(dummyVars.dm2Train, dm2.train) )
\(~\)
\(~\)
\(~\)
\(~\)
findLinearCombos
For many models, we can find and remove features that are linear combinations of one another:
comboInformation <- findLinearCombos(dm2Train_dummies)
comboInformation
## $linearCombos
## $linearCombos[[1]]
## [1] 9 2 3 5 6 7 8
##
## $linearCombos[[2]]
## [1] 14 2 3 10 11 12 13
##
## $linearCombos[[3]]
## [1] 20 2 3 15 16 17 18 19
##
## $linearCombos[[4]]
## [1] 27
##
## $linearCombos[[5]]
## [1] 29
##
## $linearCombos[[6]]
## [1] 34 2 3 21 22 23 24 25 26 28 30 31 32 33
##
## $linearCombos[[7]]
## [1] 37 2 3 36
##
##
## $remove
## [1] 9 14 20 27 29 34 37
dm2Train_dummies_independent <- dm2Train_dummies[,-comboInformation$remove ]
\(~\)
\(~\)
\(~\)
\(~\)
findCorrelation
Similarly, for many models, we will want to find and remove features that are highly correlated with one another:
features1 <- colnames(dm2Train_dummies_independent)[!colnames(dm2Train_dummies_independent) %in% c("seqn", "diq010.Diabetes")]
features1
## [1] "riagendr.Male" "riagendr.Female"
## [3] "ridageyr" "ridreth1.MexicanAmerican"
## [5] "ridreth1.Other Hispanic" "ridreth1.Non-Hispanic White"
## [7] "ridreth1.Non-Hispanic Black" "dmdeduc2.Less than 9th grade"
## [9] "dmdeduc2.Grades 9-11th" "dmdeduc2.High school graduate/GED"
## [11] "dmdeduc2.Some college or AA degrees" "dmdmartl.Married"
## [13] "dmdmartl.Widowed" "dmdmartl.Divorced"
## [15] "dmdmartl.Separated" "dmdmartl.Never married"
## [17] "indhhin2.$0-$4,999" "indhhin2.$5,000-$9,999"
## [19] "indhhin2.$10,000-$14,999" "indhhin2.$15,000-$19,999"
## [21] "indhhin2.$20,000-$24,999" "indhhin2.$25,000-$34,999"
## [23] "indhhin2.$45,000-$54,999" "indhhin2.$65,000-$74,999"
## [25] "indhhin2.20,000+" "indhhin2.less than $20,000"
## [27] "indhhin2.$75,000-$99,999" "bmxbmi"
## [29] "lbxglu"
cor_matrix <- cor(dm2Train_dummies_independent[, features1])
cor_matrix
## riagendr.Male riagendr.Female ridageyr
## riagendr.Male 1.0000000000 -1.0000000000 0.046088493
## riagendr.Female -1.0000000000 1.0000000000 -0.046088493
## ridageyr 0.0460884932 -0.0460884932 1.000000000
## ridreth1.MexicanAmerican -0.0085703283 0.0085703283 -0.022071061
## ridreth1.Other Hispanic -0.0412627007 0.0412627007 0.026956617
## ridreth1.Non-Hispanic White 0.0310058641 -0.0310058641 0.156358779
## ridreth1.Non-Hispanic Black -0.0372062109 0.0372062109 -0.075988046
## dmdeduc2.Less than 9th grade -0.0009220555 0.0009220555 0.147823014
## dmdeduc2.Grades 9-11th 0.0676150181 -0.0676150181 -0.017679412
## dmdeduc2.High school graduate/GED -0.0077741175 0.0077741175 0.002357556
## dmdeduc2.Some college or AA degrees -0.0463613674 0.0463613674 -0.043787659
## dmdmartl.Married 0.0882752805 -0.0882752805 0.105297351
## dmdmartl.Widowed -0.1110870515 0.1110870515 0.359280920
## dmdmartl.Divorced -0.0095450138 0.0095450138 0.160349391
## dmdmartl.Separated -0.0400687435 0.0400687435 0.048347775
## dmdmartl.Never married -0.0084660196 0.0084660196 -0.391576503
## indhhin2.$0-$4,999 0.0333800869 -0.0333800869 -0.005337734
## indhhin2.$5,000-$9,999 0.0202719396 -0.0202719396 0.103081009
## indhhin2.$10,000-$14,999 -0.0280078607 0.0280078607 0.099516500
## indhhin2.$15,000-$19,999 -0.0082798716 0.0082798716 0.047583836
## indhhin2.$20,000-$24,999 -0.0136157655 0.0136157655 0.004340429
## indhhin2.$25,000-$34,999 -0.0301474637 0.0301474637 -0.023087596
## indhhin2.$45,000-$54,999 -0.0014296884 0.0014296884 -0.038052431
## indhhin2.$65,000-$74,999 0.0066689392 -0.0066689392 0.005053337
## indhhin2.20,000+ -0.0334160423 0.0334160423 -0.081919744
## indhhin2.less than $20,000 -0.0141467473 0.0141467473 0.056240125
## indhhin2.$75,000-$99,999 0.0370271813 -0.0370271813 -0.085864313
## bmxbmi -0.0740507417 0.0740507417 0.054940490
## lbxglu 0.0525380850 -0.0525380850 0.221625646
## ridreth1.MexicanAmerican
## riagendr.Male -0.008570328
## riagendr.Female 0.008570328
## ridageyr -0.022071061
## ridreth1.MexicanAmerican 1.000000000
## ridreth1.Other Hispanic -0.177716978
## ridreth1.Non-Hispanic White -0.321970895
## ridreth1.Non-Hispanic Black -0.210901590
## dmdeduc2.Less than 9th grade 0.328273872
## dmdeduc2.Grades 9-11th 0.093925466
## dmdeduc2.High school graduate/GED -0.025177896
## dmdeduc2.Some college or AA degrees -0.115248278
## dmdmartl.Married 0.067258067
## dmdmartl.Widowed 0.052896641
## dmdmartl.Divorced -0.047754034
## dmdmartl.Separated 0.052214662
## dmdmartl.Never married -0.105639997
## indhhin2.$0-$4,999 -0.033075430
## indhhin2.$5,000-$9,999 0.061304872
## indhhin2.$10,000-$14,999 0.067602978
## indhhin2.$15,000-$19,999 0.051828844
## indhhin2.$20,000-$24,999 0.080518667
## indhhin2.$25,000-$34,999 -0.022372295
## indhhin2.$45,000-$54,999 -0.021509342
## indhhin2.$65,000-$74,999 -0.019822460
## indhhin2.20,000+ 0.106103297
## indhhin2.less than $20,000 0.004210595
## indhhin2.$75,000-$99,999 -0.058022583
## bmxbmi 0.116792190
## lbxglu 0.055799978
## ridreth1.Other Hispanic
## riagendr.Male -0.041262701
## riagendr.Female 0.041262701
## ridageyr 0.026956617
## ridreth1.MexicanAmerican -0.177716978
## ridreth1.Other Hispanic 1.000000000
## ridreth1.Non-Hispanic White -0.308856305
## ridreth1.Non-Hispanic Black -0.202311100
## dmdeduc2.Less than 9th grade 0.094191293
## dmdeduc2.Grades 9-11th 0.059711756
## dmdeduc2.High school graduate/GED -0.057372094
## dmdeduc2.Some college or AA degrees -0.026650804
## dmdmartl.Married -0.015939096
## dmdmartl.Widowed -0.032212505
## dmdmartl.Divorced 0.001870325
## dmdmartl.Separated 0.031938656
## dmdmartl.Never married -0.035570780
## indhhin2.$0-$4,999 0.030129995
## indhhin2.$5,000-$9,999 0.011555790
## indhhin2.$10,000-$14,999 0.068372573
## indhhin2.$15,000-$19,999 0.043839553
## indhhin2.$20,000-$24,999 0.010538303
## indhhin2.$25,000-$34,999 -0.031578666
## indhhin2.$45,000-$54,999 -0.012403737
## indhhin2.$65,000-$74,999 0.007335408
## indhhin2.20,000+ -0.015924383
## indhhin2.less than $20,000 0.008793109
## indhhin2.$75,000-$99,999 0.021713079
## bmxbmi 0.031603671
## lbxglu 0.053913405
## ridreth1.Non-Hispanic White
## riagendr.Male 0.031005864
## riagendr.Female -0.031005864
## ridageyr 0.156358779
## ridreth1.MexicanAmerican -0.321970895
## ridreth1.Other Hispanic -0.308856305
## ridreth1.Non-Hispanic White 1.000000000
## ridreth1.Non-Hispanic Black -0.366528210
## dmdeduc2.Less than 9th grade -0.192023323
## dmdeduc2.Grades 9-11th -0.085261491
## dmdeduc2.High school graduate/GED 0.081203139
## dmdeduc2.Some college or AA degrees 0.070062959
## dmdmartl.Married 0.015294283
## dmdmartl.Widowed 0.059593558
## dmdmartl.Divorced 0.084753269
## dmdmartl.Separated -0.050557371
## dmdmartl.Never married -0.074683860
## indhhin2.$0-$4,999 -0.034612554
## indhhin2.$5,000-$9,999 -0.066432877
## indhhin2.$10,000-$14,999 -0.060004455
## indhhin2.$15,000-$19,999 -0.011209740
## indhhin2.$20,000-$24,999 -0.027214942
## indhhin2.$25,000-$34,999 0.040134962
## indhhin2.$45,000-$54,999 0.006137821
## indhhin2.$65,000-$74,999 0.013117724
## indhhin2.20,000+ -0.016619728
## indhhin2.less than $20,000 -0.020652926
## indhhin2.$75,000-$99,999 -0.030853880
## bmxbmi -0.047290232
## lbxglu -0.048149775
## ridreth1.Non-Hispanic Black
## riagendr.Male -0.037206211
## riagendr.Female 0.037206211
## ridageyr -0.075988046
## ridreth1.MexicanAmerican -0.210901590
## ridreth1.Other Hispanic -0.202311100
## ridreth1.Non-Hispanic White -0.366528210
## ridreth1.Non-Hispanic Black 1.000000000
## dmdeduc2.Less than 9th grade -0.078149171
## dmdeduc2.Grades 9-11th -0.004177363
## dmdeduc2.High school graduate/GED 0.080487107
## dmdeduc2.Some college or AA degrees 0.063409064
## dmdmartl.Married -0.159818914
## dmdmartl.Widowed -0.037916187
## dmdmartl.Divorced 0.009980058
## dmdmartl.Separated 0.054691455
## dmdmartl.Never married 0.192682039
## indhhin2.$0-$4,999 0.058020816
## indhhin2.$5,000-$9,999 0.045378363
## indhhin2.$10,000-$14,999 0.027025111
## indhhin2.$15,000-$19,999 -0.046330517
## indhhin2.$20,000-$24,999 0.012999189
## indhhin2.$25,000-$34,999 0.015608909
## indhhin2.$45,000-$54,999 0.026768158
## indhhin2.$65,000-$74,999 -0.002248780
## indhhin2.20,000+ -0.048907525
## indhhin2.less than $20,000 0.021062862
## indhhin2.$75,000-$99,999 0.010089490
## bmxbmi 0.106419542
## lbxglu 0.011610207
## dmdeduc2.Less than 9th grade
## riagendr.Male -0.0009220555
## riagendr.Female 0.0009220555
## ridageyr 0.1478230144
## ridreth1.MexicanAmerican 0.3282738722
## ridreth1.Other Hispanic 0.0941912931
## ridreth1.Non-Hispanic White -0.1920233230
## ridreth1.Non-Hispanic Black -0.0781491710
## dmdeduc2.Less than 9th grade 1.0000000000
## dmdeduc2.Grades 9-11th -0.1321511581
## dmdeduc2.High school graduate/GED -0.2020700027
## dmdeduc2.Some college or AA degrees -0.2269233016
## dmdmartl.Married -0.0107183027
## dmdmartl.Widowed 0.1932578139
## dmdmartl.Divorced -0.0327518205
## dmdmartl.Separated 0.0788352705
## dmdmartl.Never married -0.0951645902
## indhhin2.$0-$4,999 0.0301543083
## indhhin2.$5,000-$9,999 0.1435950588
## indhhin2.$10,000-$14,999 0.1268623876
## indhhin2.$15,000-$19,999 0.0700824303
## indhhin2.$20,000-$24,999 0.0934027377
## indhhin2.$25,000-$34,999 -0.0404739930
## indhhin2.$45,000-$54,999 -0.0354900485
## indhhin2.$65,000-$74,999 -0.0222554498
## indhhin2.20,000+ -0.0192741216
## indhhin2.less than $20,000 0.0585181739
## indhhin2.$75,000-$99,999 -0.0842356013
## bmxbmi 0.0124665369
## lbxglu 0.0788325530
## dmdeduc2.Grades 9-11th
## riagendr.Male 0.067615018
## riagendr.Female -0.067615018
## ridageyr -0.017679412
## ridreth1.MexicanAmerican 0.093925466
## ridreth1.Other Hispanic 0.059711756
## ridreth1.Non-Hispanic White -0.085261491
## ridreth1.Non-Hispanic Black -0.004177363
## dmdeduc2.Less than 9th grade -0.132151158
## dmdeduc2.Grades 9-11th 1.000000000
## dmdeduc2.High school graduate/GED -0.194387846
## dmdeduc2.Some college or AA degrees -0.218296289
## dmdmartl.Married -0.024721213
## dmdmartl.Widowed -0.038120134
## dmdmartl.Divorced 0.011198350
## dmdmartl.Separated 0.070645079
## dmdmartl.Never married -0.005731553
## indhhin2.$0-$4,999 0.101146120
## indhhin2.$5,000-$9,999 0.075501039
## indhhin2.$10,000-$14,999 0.040530516
## indhhin2.$15,000-$19,999 0.048786101
## indhhin2.$20,000-$24,999 0.033182461
## indhhin2.$25,000-$34,999 0.047684818
## indhhin2.$45,000-$54,999 -0.038028797
## indhhin2.$65,000-$74,999 -0.015752518
## indhhin2.20,000+ 0.033202359
## indhhin2.less than $20,000 -0.013741434
## indhhin2.$75,000-$99,999 -0.025204946
## bmxbmi 0.006400369
## lbxglu -0.021872981
## dmdeduc2.High school graduate/GED
## riagendr.Male -0.007774118
## riagendr.Female 0.007774118
## ridageyr 0.002357556
## ridreth1.MexicanAmerican -0.025177896
## ridreth1.Other Hispanic -0.057372094
## ridreth1.Non-Hispanic White 0.081203139
## ridreth1.Non-Hispanic Black 0.080487107
## dmdeduc2.Less than 9th grade -0.202070003
## dmdeduc2.Grades 9-11th -0.194387846
## dmdeduc2.High school graduate/GED 1.000000000
## dmdeduc2.Some college or AA degrees -0.333793002
## dmdmartl.Married -0.041561558
## dmdmartl.Widowed 0.052185083
## dmdmartl.Divorced 0.013911245
## dmdmartl.Separated -0.033930172
## dmdmartl.Never married 0.014957400
## indhhin2.$0-$4,999 0.014936767
## indhhin2.$5,000-$9,999 -0.025510668
## indhhin2.$10,000-$14,999 0.066777014
## indhhin2.$15,000-$19,999 0.078688206
## indhhin2.$20,000-$24,999 -0.028642427
## indhhin2.$25,000-$34,999 0.080105990
## indhhin2.$45,000-$54,999 0.005154086
## indhhin2.$65,000-$74,999 0.038227235
## indhhin2.20,000+ -0.012414401
## indhhin2.less than $20,000 0.007328464
## indhhin2.$75,000-$99,999 -0.067298498
## bmxbmi 0.059502755
## lbxglu 0.038260129
## dmdeduc2.Some college or AA degrees
## riagendr.Male -0.046361367
## riagendr.Female 0.046361367
## ridageyr -0.043787659
## ridreth1.MexicanAmerican -0.115248278
## ridreth1.Other Hispanic -0.026650804
## ridreth1.Non-Hispanic White 0.070062959
## ridreth1.Non-Hispanic Black 0.063409064
## dmdeduc2.Less than 9th grade -0.226923302
## dmdeduc2.Grades 9-11th -0.218296289
## dmdeduc2.High school graduate/GED -0.333793002
## dmdeduc2.Some college or AA degrees 1.000000000
## dmdmartl.Married -0.050585041
## dmdmartl.Widowed -0.054164114
## dmdmartl.Divorced 0.049643221
## dmdmartl.Separated -0.017812444
## dmdmartl.Never married 0.051589769
## indhhin2.$0-$4,999 -0.061416580
## indhhin2.$5,000-$9,999 -0.009211246
## indhhin2.$10,000-$14,999 -0.056464018
## indhhin2.$15,000-$19,999 -0.049835260
## indhhin2.$20,000-$24,999 0.029408267
## indhhin2.$25,000-$34,999 0.017885975
## indhhin2.$45,000-$54,999 0.082630518
## indhhin2.$65,000-$74,999 0.015921913
## indhhin2.20,000+ -0.029218897
## indhhin2.less than $20,000 0.006303120
## indhhin2.$75,000-$99,999 0.024863521
## bmxbmi 0.071803429
## lbxglu -0.040285393
## dmdmartl.Married dmdmartl.Widowed
## riagendr.Male 0.08827528 -0.111087051
## riagendr.Female -0.08827528 0.111087051
## ridageyr 0.10529735 0.359280920
## ridreth1.MexicanAmerican 0.06725807 0.052896641
## ridreth1.Other Hispanic -0.01593910 -0.032212505
## ridreth1.Non-Hispanic White 0.01529428 0.059593558
## ridreth1.Non-Hispanic Black -0.15981891 -0.037916187
## dmdeduc2.Less than 9th grade -0.01071830 0.193257814
## dmdeduc2.Grades 9-11th -0.02472121 -0.038120134
## dmdeduc2.High school graduate/GED -0.04156156 0.052185083
## dmdeduc2.Some college or AA degrees -0.05058504 -0.054164114
## dmdmartl.Married 1.00000000 -0.289838225
## dmdmartl.Widowed -0.28983822 1.000000000
## dmdmartl.Divorced -0.35357750 -0.099608674
## dmdmartl.Separated -0.19212731 -0.054125464
## dmdmartl.Never married -0.47425428 -0.133605334
## indhhin2.$0-$4,999 -0.11669433 0.028159045
## indhhin2.$5,000-$9,999 -0.10623715 0.138000043
## indhhin2.$10,000-$14,999 -0.10652619 0.113977642
## indhhin2.$15,000-$19,999 -0.13782316 0.026278741
## indhhin2.$20,000-$24,999 -0.03946540 -0.002073153
## indhhin2.$25,000-$34,999 -0.02589437 0.040742036
## indhhin2.$45,000-$54,999 0.07449836 -0.034556429
## indhhin2.$65,000-$74,999 0.05005180 -0.042747474
## indhhin2.20,000+ -0.05884298 -0.051180644
## indhhin2.less than $20,000 -0.05129407 0.027658261
## indhhin2.$75,000-$99,999 0.06660687 -0.072779967
## bmxbmi 0.02601791 0.013266915
## lbxglu 0.02798044 0.047303877
## dmdmartl.Divorced dmdmartl.Separated
## riagendr.Male -0.009545014 -0.040068744
## riagendr.Female 0.009545014 0.040068744
## ridageyr 0.160349391 0.048347775
## ridreth1.MexicanAmerican -0.047754034 0.052214662
## ridreth1.Other Hispanic 0.001870325 0.031938656
## ridreth1.Non-Hispanic White 0.084753269 -0.050557371
## ridreth1.Non-Hispanic Black 0.009980058 0.054691455
## dmdeduc2.Less than 9th grade -0.032751820 0.078835270
## dmdeduc2.Grades 9-11th 0.011198350 0.070645079
## dmdeduc2.High school graduate/GED 0.013911245 -0.033930172
## dmdeduc2.Some college or AA degrees 0.049643221 -0.017812444
## dmdmartl.Married -0.353577501 -0.192127306
## dmdmartl.Widowed -0.099608674 -0.054125464
## dmdmartl.Divorced 1.000000000 -0.066028372
## dmdmartl.Separated -0.066028372 1.000000000
## dmdmartl.Never married -0.162986922 -0.088564001
## indhhin2.$0-$4,999 0.105461562 -0.005041179
## indhhin2.$5,000-$9,999 0.053565917 0.047206824
## indhhin2.$10,000-$14,999 0.034257498 0.077752191
## indhhin2.$15,000-$19,999 0.074854585 0.050752918
## indhhin2.$20,000-$24,999 -0.013293014 0.082296889
## indhhin2.$25,000-$34,999 -0.001623868 0.002469709
## indhhin2.$45,000-$54,999 0.024608971 -0.061061909
## indhhin2.$65,000-$74,999 -0.011975714 0.001674479
## indhhin2.20,000+ -0.029507199 -0.005940246
## indhhin2.less than $20,000 -0.011876911 0.039301796
## indhhin2.$75,000-$99,999 -0.064856057 -0.069025777
## bmxbmi 0.030141728 0.056879393
## lbxglu 0.091058683 0.029194977
## dmdmartl.Never married indhhin2.$0-$4,999
## riagendr.Male -0.008466020 0.033380087
## riagendr.Female 0.008466020 -0.033380087
## ridageyr -0.391576503 -0.005337734
## ridreth1.MexicanAmerican -0.105639997 -0.033075430
## ridreth1.Other Hispanic -0.035570780 0.030129995
## ridreth1.Non-Hispanic White -0.074683860 -0.034612554
## ridreth1.Non-Hispanic Black 0.192682039 0.058020816
## dmdeduc2.Less than 9th grade -0.095164590 0.030154308
## dmdeduc2.Grades 9-11th -0.005731553 0.101146120
## dmdeduc2.High school graduate/GED 0.014957400 0.014936767
## dmdeduc2.Some college or AA degrees 0.051589769 -0.061416580
## dmdmartl.Married -0.474254283 -0.116694334
## dmdmartl.Widowed -0.133605334 0.028159045
## dmdmartl.Divorced -0.162986922 0.105461562
## dmdmartl.Separated -0.088564001 -0.005041179
## dmdmartl.Never married 1.000000000 0.052759404
## indhhin2.$0-$4,999 0.052759404 1.000000000
## indhhin2.$5,000-$9,999 0.022907899 -0.039986625
## indhhin2.$10,000-$14,999 0.020394547 -0.049452109
## indhhin2.$15,000-$19,999 0.014224570 -0.052321316
## indhhin2.$20,000-$24,999 -0.025338713 -0.053562472
## indhhin2.$25,000-$34,999 0.031583171 -0.073892443
## indhhin2.$45,000-$54,999 -0.023905784 -0.056882803
## indhhin2.$65,000-$74,999 -0.035848411 -0.050099561
## indhhin2.20,000+ 0.116319739 -0.031604563
## indhhin2.less than $20,000 0.027156280 -0.026040119
## indhhin2.$75,000-$99,999 0.009495700 -0.064301620
## bmxbmi -0.106358679 0.040396520
## lbxglu -0.127696270 -0.019100099
## indhhin2.$5,000-$9,999
## riagendr.Male 0.020271940
## riagendr.Female -0.020271940
## ridageyr 0.103081009
## ridreth1.MexicanAmerican 0.061304872
## ridreth1.Other Hispanic 0.011555790
## ridreth1.Non-Hispanic White -0.066432877
## ridreth1.Non-Hispanic Black 0.045378363
## dmdeduc2.Less than 9th grade 0.143595059
## dmdeduc2.Grades 9-11th 0.075501039
## dmdeduc2.High school graduate/GED -0.025510668
## dmdeduc2.Some college or AA degrees -0.009211246
## dmdmartl.Married -0.106237151
## dmdmartl.Widowed 0.138000043
## dmdmartl.Divorced 0.053565917
## dmdmartl.Separated 0.047206824
## dmdmartl.Never married 0.022907899
## indhhin2.$0-$4,999 -0.039986625
## indhhin2.$5,000-$9,999 1.000000000
## indhhin2.$10,000-$14,999 -0.063510172
## indhhin2.$15,000-$19,999 -0.067195027
## indhhin2.$20,000-$24,999 -0.068789013
## indhhin2.$25,000-$34,999 -0.094898314
## indhhin2.$45,000-$54,999 -0.073053236
## indhhin2.$65,000-$74,999 -0.064341679
## indhhin2.20,000+ -0.040588991
## indhhin2.less than $20,000 -0.033442708
## indhhin2.$75,000-$99,999 -0.082581047
## bmxbmi 0.009554692
## lbxglu -0.005718597
## indhhin2.$10,000-$14,999
## riagendr.Male -0.02800786
## riagendr.Female 0.02800786
## ridageyr 0.09951650
## ridreth1.MexicanAmerican 0.06760298
## ridreth1.Other Hispanic 0.06837257
## ridreth1.Non-Hispanic White -0.06000445
## ridreth1.Non-Hispanic Black 0.02702511
## dmdeduc2.Less than 9th grade 0.12686239
## dmdeduc2.Grades 9-11th 0.04053052
## dmdeduc2.High school graduate/GED 0.06677701
## dmdeduc2.Some college or AA degrees -0.05646402
## dmdmartl.Married -0.10652619
## dmdmartl.Widowed 0.11397764
## dmdmartl.Divorced 0.03425750
## dmdmartl.Separated 0.07775219
## dmdmartl.Never married 0.02039455
## indhhin2.$0-$4,999 -0.04945211
## indhhin2.$5,000-$9,999 -0.06351017
## indhhin2.$10,000-$14,999 1.00000000
## indhhin2.$15,000-$19,999 -0.08310118
## indhhin2.$20,000-$24,999 -0.08507249
## indhhin2.$25,000-$34,999 -0.11736229
## indhhin2.$45,000-$54,999 -0.09034612
## indhhin2.$65,000-$74,999 -0.07957240
## indhhin2.20,000+ -0.05019706
## indhhin2.less than $20,000 -0.04135914
## indhhin2.$75,000-$99,999 -0.10212932
## bmxbmi -0.01186220
## lbxglu 0.00247967
## indhhin2.$15,000-$19,999
## riagendr.Male -0.008279872
## riagendr.Female 0.008279872
## ridageyr 0.047583836
## ridreth1.MexicanAmerican 0.051828844
## ridreth1.Other Hispanic 0.043839553
## ridreth1.Non-Hispanic White -0.011209740
## ridreth1.Non-Hispanic Black -0.046330517
## dmdeduc2.Less than 9th grade 0.070082430
## dmdeduc2.Grades 9-11th 0.048786101
## dmdeduc2.High school graduate/GED 0.078688206
## dmdeduc2.Some college or AA degrees -0.049835260
## dmdmartl.Married -0.137823165
## dmdmartl.Widowed 0.026278741
## dmdmartl.Divorced 0.074854585
## dmdmartl.Separated 0.050752918
## dmdmartl.Never married 0.014224570
## indhhin2.$0-$4,999 -0.052321316
## indhhin2.$5,000-$9,999 -0.067195027
## indhhin2.$10,000-$14,999 -0.083101181
## indhhin2.$15,000-$19,999 1.000000000
## indhhin2.$20,000-$24,999 -0.090008390
## indhhin2.$25,000-$34,999 -0.124171637
## indhhin2.$45,000-$54,999 -0.095587998
## indhhin2.$65,000-$74,999 -0.084189185
## indhhin2.20,000+ -0.053109495
## indhhin2.less than $20,000 -0.043758794
## indhhin2.$75,000-$99,999 -0.108054857
## bmxbmi -0.021396061
## lbxglu 0.064868950
## indhhin2.$20,000-$24,999
## riagendr.Male -0.013615765
## riagendr.Female 0.013615765
## ridageyr 0.004340429
## ridreth1.MexicanAmerican 0.080518667
## ridreth1.Other Hispanic 0.010538303
## ridreth1.Non-Hispanic White -0.027214942
## ridreth1.Non-Hispanic Black 0.012999189
## dmdeduc2.Less than 9th grade 0.093402738
## dmdeduc2.Grades 9-11th 0.033182461
## dmdeduc2.High school graduate/GED -0.028642427
## dmdeduc2.Some college or AA degrees 0.029408267
## dmdmartl.Married -0.039465404
## dmdmartl.Widowed -0.002073153
## dmdmartl.Divorced -0.013293014
## dmdmartl.Separated 0.082296889
## dmdmartl.Never married -0.025338713
## indhhin2.$0-$4,999 -0.053562472
## indhhin2.$5,000-$9,999 -0.068789013
## indhhin2.$10,000-$14,999 -0.085072490
## indhhin2.$15,000-$19,999 -0.090008390
## indhhin2.$20,000-$24,999 1.000000000
## indhhin2.$25,000-$34,999 -0.127117211
## indhhin2.$45,000-$54,999 -0.097855517
## indhhin2.$65,000-$74,999 -0.086186303
## indhhin2.20,000+ -0.054369347
## indhhin2.less than $20,000 -0.044796831
## indhhin2.$75,000-$99,999 -0.110618112
## bmxbmi 0.035630116
## lbxglu 0.051934542
## indhhin2.$25,000-$34,999
## riagendr.Male -0.030147464
## riagendr.Female 0.030147464
## ridageyr -0.023087596
## ridreth1.MexicanAmerican -0.022372295
## ridreth1.Other Hispanic -0.031578666
## ridreth1.Non-Hispanic White 0.040134962
## ridreth1.Non-Hispanic Black 0.015608909
## dmdeduc2.Less than 9th grade -0.040473993
## dmdeduc2.Grades 9-11th 0.047684818
## dmdeduc2.High school graduate/GED 0.080105990
## dmdeduc2.Some college or AA degrees 0.017885975
## dmdmartl.Married -0.025894374
## dmdmartl.Widowed 0.040742036
## dmdmartl.Divorced -0.001623868
## dmdmartl.Separated 0.002469709
## dmdmartl.Never married 0.031583171
## indhhin2.$0-$4,999 -0.073892443
## indhhin2.$5,000-$9,999 -0.094898314
## indhhin2.$10,000-$14,999 -0.117362287
## indhhin2.$15,000-$19,999 -0.124171637
## indhhin2.$20,000-$24,999 -0.127117211
## indhhin2.$25,000-$34,999 1.000000000
## indhhin2.$45,000-$54,999 -0.134997191
## indhhin2.$65,000-$74,999 -0.118898854
## indhhin2.20,000+ -0.075005573
## indhhin2.less than $20,000 -0.061799749
## indhhin2.$75,000-$99,999 -0.152603909
## bmxbmi -0.018310770
## lbxglu 0.012113054
## indhhin2.$45,000-$54,999
## riagendr.Male -0.001429688
## riagendr.Female 0.001429688
## ridageyr -0.038052431
## ridreth1.MexicanAmerican -0.021509342
## ridreth1.Other Hispanic -0.012403737
## ridreth1.Non-Hispanic White 0.006137821
## ridreth1.Non-Hispanic Black 0.026768158
## dmdeduc2.Less than 9th grade -0.035490049
## dmdeduc2.Grades 9-11th -0.038028797
## dmdeduc2.High school graduate/GED 0.005154086
## dmdeduc2.Some college or AA degrees 0.082630518
## dmdmartl.Married 0.074498358
## dmdmartl.Widowed -0.034556429
## dmdmartl.Divorced 0.024608971
## dmdmartl.Separated -0.061061909
## dmdmartl.Never married -0.023905784
## indhhin2.$0-$4,999 -0.056882803
## indhhin2.$5,000-$9,999 -0.073053236
## indhhin2.$10,000-$14,999 -0.090346124
## indhhin2.$15,000-$19,999 -0.095587998
## indhhin2.$20,000-$24,999 -0.097855517
## indhhin2.$25,000-$34,999 -0.134997191
## indhhin2.$45,000-$54,999 1.000000000
## indhhin2.$65,000-$74,999 -0.091528982
## indhhin2.20,000+ -0.057739696
## indhhin2.less than $20,000 -0.047573781
## indhhin2.$75,000-$99,999 -0.117475316
## bmxbmi 0.034408507
## lbxglu 0.017172744
## indhhin2.$65,000-$74,999 indhhin2.20,000+
## riagendr.Male 0.006668939 -0.033416042
## riagendr.Female -0.006668939 0.033416042
## ridageyr 0.005053337 -0.081919744
## ridreth1.MexicanAmerican -0.019822460 0.106103297
## ridreth1.Other Hispanic 0.007335408 -0.015924383
## ridreth1.Non-Hispanic White 0.013117724 -0.016619728
## ridreth1.Non-Hispanic Black -0.002248780 -0.048907525
## dmdeduc2.Less than 9th grade -0.022255450 -0.019274122
## dmdeduc2.Grades 9-11th -0.015752518 0.033202359
## dmdeduc2.High school graduate/GED 0.038227235 -0.012414401
## dmdeduc2.Some college or AA degrees 0.015921913 -0.029218897
## dmdmartl.Married 0.050051795 -0.058842981
## dmdmartl.Widowed -0.042747474 -0.051180644
## dmdmartl.Divorced -0.011975714 -0.029507199
## dmdmartl.Separated 0.001674479 -0.005940246
## dmdmartl.Never married -0.035848411 0.116319739
## indhhin2.$0-$4,999 -0.050099561 -0.031604563
## indhhin2.$5,000-$9,999 -0.064341679 -0.040588991
## indhhin2.$10,000-$14,999 -0.079572401 -0.050197065
## indhhin2.$15,000-$19,999 -0.084189185 -0.053109495
## indhhin2.$20,000-$24,999 -0.086186303 -0.054369347
## indhhin2.$25,000-$34,999 -0.118898854 -0.075005573
## indhhin2.$45,000-$54,999 -0.091528982 -0.057739696
## indhhin2.$65,000-$74,999 1.000000000 -0.050854271
## indhhin2.20,000+ -0.050854271 1.000000000
## indhhin2.less than $20,000 -0.041900635 -0.026432392
## indhhin2.$75,000-$99,999 -0.103466452 -0.065270272
## bmxbmi 0.025660367 0.025320599
## lbxglu -0.064348404 0.012292850
## indhhin2.less than $20,000
## riagendr.Male -0.014146747
## riagendr.Female 0.014146747
## ridageyr 0.056240125
## ridreth1.MexicanAmerican 0.004210595
## ridreth1.Other Hispanic 0.008793109
## ridreth1.Non-Hispanic White -0.020652926
## ridreth1.Non-Hispanic Black 0.021062862
## dmdeduc2.Less than 9th grade 0.058518174
## dmdeduc2.Grades 9-11th -0.013741434
## dmdeduc2.High school graduate/GED 0.007328464
## dmdeduc2.Some college or AA degrees 0.006303120
## dmdmartl.Married -0.051294066
## dmdmartl.Widowed 0.027658261
## dmdmartl.Divorced -0.011876911
## dmdmartl.Separated 0.039301796
## dmdmartl.Never married 0.027156280
## indhhin2.$0-$4,999 -0.026040119
## indhhin2.$5,000-$9,999 -0.033442708
## indhhin2.$10,000-$14,999 -0.041359140
## indhhin2.$15,000-$19,999 -0.043758794
## indhhin2.$20,000-$24,999 -0.044796831
## indhhin2.$25,000-$34,999 -0.061799749
## indhhin2.$45,000-$54,999 -0.047573781
## indhhin2.$65,000-$74,999 -0.041900635
## indhhin2.20,000+ -0.026432392
## indhhin2.less than $20,000 1.000000000
## indhhin2.$75,000-$99,999 -0.053778489
## bmxbmi 0.014670312
## lbxglu 0.047248443
## indhhin2.$75,000-$99,999 bmxbmi
## riagendr.Male 0.037027181 -0.074050742
## riagendr.Female -0.037027181 0.074050742
## ridageyr -0.085864313 0.054940490
## ridreth1.MexicanAmerican -0.058022583 0.116792190
## ridreth1.Other Hispanic 0.021713079 0.031603671
## ridreth1.Non-Hispanic White -0.030853880 -0.047290232
## ridreth1.Non-Hispanic Black 0.010089490 0.106419542
## dmdeduc2.Less than 9th grade -0.084235601 0.012466537
## dmdeduc2.Grades 9-11th -0.025204946 0.006400369
## dmdeduc2.High school graduate/GED -0.067298498 0.059502755
## dmdeduc2.Some college or AA degrees 0.024863521 0.071803429
## dmdmartl.Married 0.066606870 0.026017907
## dmdmartl.Widowed -0.072779967 0.013266915
## dmdmartl.Divorced -0.064856057 0.030141728
## dmdmartl.Separated -0.069025777 0.056879393
## dmdmartl.Never married 0.009495700 -0.106358679
## indhhin2.$0-$4,999 -0.064301620 0.040396520
## indhhin2.$5,000-$9,999 -0.082581047 0.009554692
## indhhin2.$10,000-$14,999 -0.102129322 -0.011862198
## indhhin2.$15,000-$19,999 -0.108054857 -0.021396061
## indhhin2.$20,000-$24,999 -0.110618112 0.035630116
## indhhin2.$25,000-$34,999 -0.152603909 -0.018310770
## indhhin2.$45,000-$54,999 -0.117475316 0.034408507
## indhhin2.$65,000-$74,999 -0.103466452 0.025660367
## indhhin2.20,000+ -0.065270272 0.025320599
## indhhin2.less than $20,000 -0.053778489 0.014670312
## indhhin2.$75,000-$99,999 1.000000000 -0.002492344
## bmxbmi -0.002492344 1.000000000
## lbxglu -0.007916544 0.128391900
## lbxglu
## riagendr.Male 0.052538085
## riagendr.Female -0.052538085
## ridageyr 0.221625646
## ridreth1.MexicanAmerican 0.055799978
## ridreth1.Other Hispanic 0.053913405
## ridreth1.Non-Hispanic White -0.048149775
## ridreth1.Non-Hispanic Black 0.011610207
## dmdeduc2.Less than 9th grade 0.078832553
## dmdeduc2.Grades 9-11th -0.021872981
## dmdeduc2.High school graduate/GED 0.038260129
## dmdeduc2.Some college or AA degrees -0.040285393
## dmdmartl.Married 0.027980439
## dmdmartl.Widowed 0.047303877
## dmdmartl.Divorced 0.091058683
## dmdmartl.Separated 0.029194977
## dmdmartl.Never married -0.127696270
## indhhin2.$0-$4,999 -0.019100099
## indhhin2.$5,000-$9,999 -0.005718597
## indhhin2.$10,000-$14,999 0.002479670
## indhhin2.$15,000-$19,999 0.064868950
## indhhin2.$20,000-$24,999 0.051934542
## indhhin2.$25,000-$34,999 0.012113054
## indhhin2.$45,000-$54,999 0.017172744
## indhhin2.$65,000-$74,999 -0.064348404
## indhhin2.20,000+ 0.012292850
## indhhin2.less than $20,000 0.047248443
## indhhin2.$75,000-$99,999 -0.007916544
## bmxbmi 0.128391900
## lbxglu 1.000000000
cor_high <- findCorrelation(cor_matrix, .9)
cor_high
## [1] 1
high_cor_remove <- row.names(cor_matrix)[cor_high]
high_cor_remove
## [1] "riagendr.Male"
features2 <- setdiff(features1, high_cor_remove)
dm2_Train_independent_non_corr <- dm2Train_dummies_independent[, c("seqn", "diq010.Diabetes",features2) ]
\(~\)
\(~\)
\(~\)
\(~\)
preProcValues.range <- preProcess(dm2_Train_independent_non_corr[,features2], method = c("range"))
dm2Train_transformed.range <- predict(preProcValues.range, dm2_Train_independent_non_corr)
dm2Test_dummies <- as_tibble( predict(dummyVars.dm2Train, dm2.test) )
dm2Test_transformed.range <- as_tibble( predict(preProcValues.range, dm2Test_dummies) )
\(~\)
\(~\)
\(~\)
install_if_not('class')
## [1] "the package 'class' is already installed"
library('class')
dim(dm2.train)
## [1] 1126 10
dm2Test_transformed.range$knn_pred <- knn(dm2Train_transformed.range[,features2] ,
dm2Test_transformed.range[,features2] ,
cl = dm2Train_transformed.range$diq010.Diabetes,
k = 5)
glimpse(dm2.test)
## Rows: 750
## Columns: 10
## $ seqn <dbl> 83734, 83737, 83757, 83761, 83789, 83820, 83822, 83823, 83...
## $ riagendr <fct> Male, Female, Female, Female, Male, Male, Female, Female, ...
## $ ridageyr <dbl> 78, 72, 57, 24, 66, 70, 20, 29, 69, 71, 37, 49, 41, 54, 80...
## $ ridreth1 <fct> Non-Hispanic White, MexicanAmerican, Other Hispanic, Other...
## $ dmdeduc2 <fct> High school graduate/GED, Grades 9-11th, Less than 9th gra...
## $ dmdmartl <fct> Married, Separated, Separated, Never married, Living with ...
## $ indhhin2 <fct> "$20,000-$24,999", "$75,000-$99,999", "$20,000-$24,999", "...
## $ bmxbmi <dbl> 28.8, 28.6, 35.4, 25.3, 34.0, 27.0, 22.2, 29.7, 28.2, 27.6...
## $ diq010 <fct> Diabetes, No Diabetes, Diabetes, No Diabetes, No Diabetes,...
## $ lbxglu <dbl> 84, 107, 398, 95, 113, 94, 80, 102, 105, 76, 79, 126, 110,...
dm2Test_transformed.range <- dm2Test_transformed.range %>%
mutate(diq010.Diabetes = as.factor(diq010.Diabetes))
glimpse(dm2Test_transformed.range)
## Rows: 750
## Columns: 39
## $ seqn <dbl> 83734, 83737, 83757, 83761, 8...
## $ riagendr.Male <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 1, 1,...
## $ riagendr.Female <dbl> 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,...
## $ ridageyr <dbl> 0.96666667, 0.86666667, 0.616...
## $ ridreth1.MexicanAmerican <dbl> 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,...
## $ `ridreth1.Other Hispanic` <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,...
## $ `ridreth1.Non-Hispanic White` <dbl> 1, 0, 0, 0, 1, 1, 0, 0, 0, 0,...
## $ `ridreth1.Non-Hispanic Black` <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,...
## $ ridreth1.Other <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,...
## $ `dmdeduc2.Less than 9th grade` <dbl> 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,...
## $ `dmdeduc2.Grades 9-11th` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `dmdeduc2.High school graduate/GED` <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,...
## $ `dmdeduc2.Some college or AA degrees` <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,...
## $ `dmdeduc2.College grad or above` <dbl> 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,...
## $ dmdmartl.Married <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,...
## $ dmdmartl.Widowed <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ dmdmartl.Divorced <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ dmdmartl.Separated <dbl> 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,...
## $ `dmdmartl.Never married` <dbl> 0, 0, 0, 1, 0, 0, 1, 1, 1, 0,...
## $ `dmdmartl.Living with partner` <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,...
## $ `indhhin2.$0-$4,999` <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.$5,000-$9,999` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.$10,000-$14,999` <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,...
## $ `indhhin2.$15,000-$19,999` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.$20,000-$24,999` <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.$25,000-$34,999` <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,...
## $ `indhhin2.$35,000-$44,999` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.$45,000-$54,999` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.$55,000-$64,999` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.$65,000-$74,999` <dbl> 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,...
## $ `indhhin2.20,000+` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.less than $20,000` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.$75,000-$99,999` <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ `indhhin2.$100,000+` <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ bmxbmi <dbl> 0.25779626, 0.25363825, 0.395...
## $ diq010.Diabetes <fct> 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,...
## $ `diq010.No Diabetes` <dbl> 0, 1, 0, 1, 1, 1, 1, 1, 1, 0,...
## $ lbxglu <dbl> 0.07925408, 0.13286713, 0.811...
## $ knn_pred <fct> 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,...
library('yardstick')
## For binary classification, the first factor level is assumed to be the event.
## Set the global option `yardstick.event_first` to `FALSE` to change this.
##
## Attaching package: 'yardstick'
## The following objects are masked from 'package:caret':
##
## precision, recall
## The following object is masked from 'package:readr':
##
## spec
conf_mat(dm2Test_transformed.range, truth=diq010.Diabetes, knn_pred)
## Truth
## Prediction 0 1
## 0 608 98
## 1 30 14
summary(conf_mat(dm2Test_transformed.range, truth=diq010.Diabetes, knn_pred))
## # A tibble: 13 x 3
## .metric .estimator .estimate
## <chr> <chr> <dbl>
## 1 accuracy binary 0.829
## 2 kap binary 0.104
## 3 sens binary 0.953
## 4 spec binary 0.125
## 5 ppv binary 0.861
## 6 npv binary 0.318
## 7 mcc binary 0.118
## 8 j_index binary 0.0780
## 9 bal_accuracy binary 0.539
## 10 detection_prevalence binary 0.941
## 11 precision binary 0.861
## 12 recall binary 0.953
## 13 f_meas binary 0.905
\(~\)
knitr::opts_chunk$set(echo = TRUE)
diab_pop <- readRDS('C:/Users/jkyle/Documents/GitHub/Intro_Jeff_Data_Science/DATA/diab_pop.RDS')
#### Variable in Data - Definition - Data Type
##### seqn - Respondent sequence number - Identifier
##### riagendr - Gender - Categorical
##### ridageyr - Age in years at screening - Continuous / Numerical
##### ridreth1 - Race/Hispanic origin - Categorical
##### dmdeduc2 - Education level - Adults 20+ - Categorical
##### dmdmartl - Marital status - Categorical
##### indhhin2 - Annual household income - Categorical
##### bmxbmi - Body Mass Index (kg/m**2) - Continuous / Numerical
##### diq010 - Doctor diagnosed diabetes - Categorical / Target
##### lbxglu - Fasting Glucose (mg/dL) - Continuous / Numerical
install_if_not <- function( list.of.packages ) {
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) { install.packages(new.packages) } else { print(paste0("the package '", list.of.packages , "' is already installed")) }
}
library('tidyverse')
diab_pop.no_na_vals <- diab_pop %>% na.omit()
library('caret')
# this will ensure our results are the same every run, to randomize you may use: set.seed(Sys.time())
set.seed(8675309)
# The createDataPartition function is used to create training and test sets
trainIndex <- createDataPartition(diab_pop.no_na_vals$diq010,
p = .6,
list = FALSE,
times = 1)
dm2.train <- diab_pop.no_na_vals[trainIndex, ]
dm2.test <- diab_pop.no_na_vals[-trainIndex, ]
dummyVars.dm2Train <- dummyVars( ~. , data = dm2.train)
dm2Train_dummies <- as_tibble( predict(dummyVars.dm2Train, dm2.train) )
comboInformation <- findLinearCombos(dm2Train_dummies)
comboInformation
dm2Train_dummies_independent <- dm2Train_dummies[,-comboInformation$remove ]
features1 <- colnames(dm2Train_dummies_independent)[!colnames(dm2Train_dummies_independent) %in% c("seqn", "diq010.Diabetes")]
features1
cor_matrix <- cor(dm2Train_dummies_independent[, features1])
cor_matrix
cor_high <- findCorrelation(cor_matrix, .9)
cor_high
high_cor_remove <- row.names(cor_matrix)[cor_high]
high_cor_remove
features2 <- setdiff(features1, high_cor_remove)
dm2_Train_independent_non_corr <- dm2Train_dummies_independent[, c("seqn", "diq010.Diabetes",features2) ]
preProcValues.range <- preProcess(dm2_Train_independent_non_corr[,features2], method = c("range"))
dm2Train_transformed.range <- predict(preProcValues.range, dm2_Train_independent_non_corr)
dm2Test_dummies <- as_tibble( predict(dummyVars.dm2Train, dm2.test) )
dm2Test_transformed.range <- as_tibble( predict(preProcValues.range, dm2Test_dummies) )
install_if_not('class')
library('class')
dim(dm2.train)
dm2Test_transformed.range$knn_pred <- knn(dm2Train_transformed.range[,features2] ,
dm2Test_transformed.range[,features2] ,
cl = dm2Train_transformed.range$diq010.Diabetes,
k = 5)
glimpse(dm2.test)
dm2Test_transformed.range <- dm2Test_transformed.range %>%
mutate(diq010.Diabetes = as.factor(diq010.Diabetes))
glimpse(dm2Test_transformed.range)
library('yardstick')
conf_mat(dm2Test_transformed.range, truth=diq010.Diabetes, knn_pred)
summary(conf_mat(dm2Test_transformed.range, truth=diq010.Diabetes, knn_pred))