library(nhanesA)
## Warning: package 'nhanesA' was built under R version 4.1.2
library(knitr)
## Warning: package 'knitr' was built under R version 4.1.2
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.1.2
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'dplyr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plyr)
## Warning: package 'plyr' was built under R version 4.1.2
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.1.2
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: survival
## Warning: package 'survival' was built under R version 4.1.2
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(ggplot2)
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
library(car)
## Warning: package 'car' was built under R version 4.1.2
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.1.2
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(Rmisc)
## Warning: package 'Rmisc' was built under R version 4.1.2
## Loading required package: lattice
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(caret)
## Warning: package 'caret' was built under R version 4.1.2
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
## The following object is masked from 'package:purrr':
##
## lift
library(tableone)
## Warning: package 'tableone' was built under R version 4.1.2
library(odds.n.ends)
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(Deducer)
## Loading required package: JGR
## Warning: package 'JGR' was built under R version 4.1.2
## Loading required package: rJava
## Loading required package: JavaGD
##
## Please type JGR() to launch console. Platform specific launchers (.exe and .app) can also be obtained at http://www.rforge.net/JGR/files/.
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.1.2
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Note Non-JGR console detected:
## Deducer is best used from within JGR (http://jgr.markushelbig.org/).
## To Bring up GUI dialogs, type deducer().
library(broom)
## Warning: package 'broom' was built under R version 4.1.2
#DEMORAPHIC, EXAMINATION, LAB, QUESTIONNAIRE DATA
# Research aim- We aimed to quantify the risk of diabetes and determine whether a relationship is present between physical activity (PA)levels and diabetes.
#Population- USA Non-institutionalized citizens >= 20 years old, Outcome-Diabetes, exposure- PA, Time- 2017 - 2018
#Covariates- gender, age, race, educational level, BMI, marital status
#DOWNLOAD NHANES DATA DIRECTLY TO R
#Explore the NHANES DATA 2018
kable(head(nhanesTables('DEMO', 2018)))
| DEMO_J |
Demographic Variables and Sample Weights |
kable(head(nhanesTables('EXAM', 2018)))
| BPX_J |
Blood Pressure |
| BMX_J |
Body Measures |
| OHXDEN_J |
Oral Health - Dentition |
| OHXREF_J |
Oral Health - Recommendation of Care |
| DXXFEM_J |
Dual-Energy X-ray Absorptiometry - Femur |
| DXX_J |
Dual-Energy X-ray Absorptiometry - Whole Body |
kable(head(nhanesTables('Q', 2018)))
| BPQ_J |
Blood Pressure & Cholesterol |
| HUQ_J |
Hospital Utilization & Access to Care |
| HEQ_J |
Hepatitis |
| CDQ_J |
Cardiovascular Health |
| PAQY_J |
Physical Activity - Youth |
| PAQ_J |
Physical Activity |
#further explore NHANES 2018
kable(head(nhanesTableVars(data_group = 'DEMO', nh_table = 'DEMO_J', namesonly = FALSE)))
| AIALANGA |
Language of the MEC ACASI Interview Instrument |
| DMDBORN4 |
In what country {were you/was SP} born? |
| DMDCITZN |
{Are you/Is SP} a citizen of the United States?
[Information about citizenship is being collected by |
| DMDEDUC2 |
What is the highest grade or level of school {you
have/SP has} completed or the highest degree {you |
| DMDEDUC3 |
What is the highest grade or level of school {you
have/SP has} completed or the highest degree {you |
| DMDFMSIZ |
Total number of people in the Family |
kable(head(nhanesTableVars(data_group = 'EXAM', nh_table = 'BMX_J', namesonly = FALSE)))
| BMDSTATS |
Body Measures Component status Code |
| BMIARMC |
Arm Circumference Comment |
| BMIARML |
Upper Arm Length Comment |
| BMIHEAD |
Head Circumference Comment |
| BMIHIP |
Hip Circumference Comment |
| BMIHT |
Standing Height Comment |
kable(head(nhanesTableVars(data_group = 'LAB', nh_table = 'GHB_J', namesonly = FALSE)))
| LBXGH |
Glycohemoglobin (%) |
| SEQN |
Respondent sequence number. |
kable(head(nhanesTableVars(data_group = 'Q', nh_table = 'PAQ_J', namesonly = FALSE)))
| PAD615 |
How much time {do you/does SP} spend doing
vigorous-intensity activities at work on a typical day? |
| PAD630 |
How much time {do you/does SP} spend doing
moderate-intensity activities at work on a typical day? |
| PAD645 |
How much time {do you/does SP} spend walking or
bicycling for travel on a typical day? |
| PAD660 |
How much time {do you/does SP} spend doing
vigorous-intensity sports, fitness or recreational activi |
| PAD675 |
How much time {do you/does SP} spend doing
moderate-intensity sports, fitness or recreational activi |
| PAD680 |
The following question is about sitting at school, at
home, getting to and from places, or with frie |
#Download data 2017 - 2018 demographic data (id, age, gender, educational level, race)
demo_2018<-nhanes('DEMO_J')
names(demo_2018)
## [1] "SEQN" "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
## [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM" "DMQMILIZ" "DMQADFC"
## [13] "DMDBORN4" "DMDCITZN" "DMDYRSUS" "DMDEDUC3" "DMDEDUC2" "DMDMARTL"
## [19] "RIDEXPRG" "SIALANG" "SIAPROXY" "SIAINTRP" "FIALANG" "FIAPROXY"
## [25] "FIAINTRP" "MIALANG" "MIAPROXY" "MIAINTRP" "AIALANGA" "DMDHHSIZ"
## [31] "DMDFMSIZ" "DMDHHSZA" "DMDHHSZB" "DMDHHSZE" "DMDHRGND" "DMDHRAGZ"
## [37] "DMDHREDZ" "DMDHRMAZ" "DMDHSEDZ" "WTINT2YR" "WTMEC2YR" "SDMVPSU"
## [43] "SDMVSTRA" "INDHHIN2" "INDFMIN2" "INDFMPIR"
demo1<- demo_2018[c("SEQN", "RIAGENDR", "RIDAGEYR", "DMDMARTL", "DMDEDUC2", "RIDRETH3")]
demo_vars<- names(demo1)
demo_2<-nhanesTranslate('DEMO_J', demo_vars, data = demo1)
## Warning in FUN(X[[i]], ...): No translation table is available for SEQN
## Translated columns: RIAGENDR DMDMARTL DMDEDUC2 RIDRETH3
kable(head(demo_2, n=5))
| 93703 |
Female |
2 |
NA |
NA |
Non-Hispanic Asian |
| 93704 |
Male |
2 |
NA |
NA |
Non-Hispanic White |
| 93705 |
Female |
66 |
Divorced |
9-11th grade (Includes 12th grad |
Non-Hispanic Black |
| 93706 |
Male |
18 |
NA |
NA |
Non-Hispanic Asian |
| 93707 |
Male |
13 |
NA |
NA |
Other Race - Including Multi-Rac |
#Download data diabetes data (id, hba1c)
#labs-hbaic
db_lab_2018<- nhanes('GHB_J')
names(db_lab_2018)
## [1] "SEQN" "LBXGH"
hbaic_lab<- db_lab_2018[c("SEQN","LBXGH")]
hbaic_lab_vars<- names(hbaic_lab)
hbaic_lab_2<-nhanesTranslate('GHB_J', hbaic_lab_vars, data = hbaic_lab)
## Warning in FUN(X[[i]], ...): No translation table is available for SEQN
## Warning in nhanesTranslate("GHB_J", hbaic_lab_vars, data = hbaic_lab): No
## columns were translated
kable(head(hbaic_lab_2, n=5))
| 93705 |
6.2 |
| 93706 |
5.2 |
| 93707 |
5.6 |
| 93708 |
6.2 |
| 93709 |
6.3 |
#Download data PA
db_PA_2018<-nhanes('PAQ_J')
names(db_PA_2018)
## [1] "SEQN" "PAQ605" "PAQ610" "PAD615" "PAQ620" "PAQ625" "PAD630" "PAQ635"
## [9] "PAQ640" "PAD645" "PAQ650" "PAQ655" "PAD660" "PAQ665" "PAQ670" "PAD675"
## [17] "PAD680"
db_PA_qs<- db_PA_2018[c("SEQN","PAD615","PAD630", "PAD645", "PAD660", "PAD675","PAQ610", "PAQ625", "PAQ640", "PAQ655", "PAQ670")]
db_PA_qs_vars<- names(db_PA_qs)
have_PA_qs_2<-nhanesTranslate('PAQ_J', db_PA_qs_vars, data = db_PA_qs)
## Warning in FUN(X[[i]], ...): No translation table is available for SEQN
## Warning in nhanesTranslate("PAQ_J", db_PA_qs_vars, data = db_PA_qs): No columns
## were translated
kable(head(have_PA_qs_2))
| 93705 |
NA |
NA |
NA |
NA |
60 |
NA |
NA |
NA |
NA |
2 |
| 93706 |
NA |
NA |
45 |
NA |
30 |
NA |
NA |
5 |
NA |
2 |
| 93708 |
NA |
NA |
NA |
NA |
30 |
NA |
NA |
NA |
NA |
5 |
| 93709 |
NA |
180 |
NA |
NA |
NA |
NA |
2 |
NA |
NA |
NA |
| 93711 |
NA |
NA |
60 |
60 |
30 |
NA |
NA |
5 |
4 |
2 |
| 93712 |
420 |
180 |
NA |
45 |
90 |
6 |
6 |
NA |
4 |
3 |
# multiply the METs by the CDC recommended factors
have_PA_qs_2$PAQ610_score <- have_PA_qs_2$PAQ610 *8
have_PA_qs_2$PAQ625_score <- have_PA_qs_2$PAQ625 *4
have_PA_qs_2$PAQ640_score <- have_PA_qs_2$PAQ640 *4
have_PA_qs_2$PAQ655_score <- have_PA_qs_2$PAQ655 *8
have_PA_qs_2$PAQ670_score <- have_PA_qs_2$PAQ670 *8
#PA duration in minute per day
have_PA_qs_2$minutes<-rowSums(cbind(have_PA_qs_2$PAQ610_score , have_PA_qs_2$PAQ625_score ,have_PA_qs_2$PAQ640_score , have_PA_qs_2$PAQ655_score , have_PA_qs_2$PAQ670_score), na.rm=TRUE)
#PA duration in days per week
have_PA_qs_2$days<-rowSums(cbind(have_PA_qs_2$PAD615 , have_PA_qs_2$PAD630 ,have_PA_qs_2$PAD645 , have_PA_qs_2$PAD660 , have_PA_qs_2$PAD675), na.rm=TRUE)
have_PA_qs_2<-have_PA_qs_2 %>%
mutate(met_1 = PAQ610_score * PAD615,
met_2 = PAQ625_score * PAD630,
met_3 = PAQ640_score * PAD645,
met_4 = PAQ655_score * PAD660,
met_5 = PAQ670_score * PAD675)
#cbind- merge PA variables importanat for our analysis
have_PA_qs_2$combined_MET_Scores<- rowSums(cbind(have_PA_qs_2$met_1 , have_PA_qs_2$met_2 ,have_PA_qs_2$met_3 , have_PA_qs_2$met_4 , have_PA_qs_2$met_5), na.rm=TRUE)
have_PA_qs_2$combined_MET_Scores_hrs <- have_PA_qs_2$combined_MET_Scores /60
head(have_PA_qs_2)
## SEQN PAD615 PAD630 PAD645 PAD660 PAD675 PAQ610 PAQ625 PAQ640 PAQ655 PAQ670
## 1 93705 NA NA NA NA 60 NA NA NA NA 2
## 2 93706 NA NA 45 NA 30 NA NA 5 NA 2
## 3 93708 NA NA NA NA 30 NA NA NA NA 5
## 4 93709 NA 180 NA NA NA NA 2 NA NA NA
## 5 93711 NA NA 60 60 30 NA NA 5 4 2
## 6 93712 420 180 NA 45 90 6 6 NA 4 3
## PAQ610_score PAQ625_score PAQ640_score PAQ655_score PAQ670_score minutes days
## 1 NA NA NA NA 16 16 60
## 2 NA NA 20 NA 16 36 75
## 3 NA NA NA NA 40 40 30
## 4 NA 8 NA NA NA 8 180
## 5 NA NA 20 32 16 68 150
## 6 48 24 NA 32 24 128 735
## met_1 met_2 met_3 met_4 met_5 combined_MET_Scores combined_MET_Scores_hrs
## 1 NA NA NA NA 960 960 16
## 2 NA NA 900 NA 480 1380 23
## 3 NA NA NA NA 1200 1200 20
## 4 NA 1440 NA NA NA 1440 24
## 5 NA NA 1200 1920 480 3600 60
## 6 20160 4320 NA 1440 2160 28080 468
PA_Level_MET_Score <- have_PA_qs_2 %>%
dplyr::select("SEQN", "combined_MET_Scores_hrs")
kable(head(PA_Level_MET_Score))
| 93705 |
16 |
| 93706 |
23 |
| 93708 |
20 |
| 93709 |
24 |
| 93711 |
60 |
| 93712 |
468 |
#Download data BMI
db_BMI_2018<-nhanes('BMX_J')
db_BMI_vars<-names(db_BMI_2018)
db_BMI_2018_qs<- db_BMI_2018[c("SEQN","BMXHT","BMXWT")]
db_BMI_2018_2<-nhanesTranslate('BMX_J', db_BMI_vars, data = db_BMI_2018_qs)
## Warning in FUN(X[[i]], ...): No translation table is available for SEQN
## Warning in nhanesTranslate("BMX_J", db_BMI_vars, data = db_BMI_2018_qs): No
## columns were translated
kable(head(db_BMI_2018_2))
| 93703 |
88.6 |
13.7 |
| 93704 |
94.2 |
13.9 |
| 93705 |
158.3 |
79.5 |
| 93706 |
175.7 |
66.3 |
| 93707 |
158.4 |
45.4 |
| 93708 |
150.2 |
53.5 |
#merge all datasets
stat_project_analytic_data<- join_all(list(demo_2, hbaic_lab_2, PA_Level_MET_Score, db_BMI_2018_2), by = "SEQN", type = "full")
kable(head(stat_project_analytic_data))
| 93703 |
Female |
2 |
NA |
NA |
Non-Hispanic Asian |
NA |
NA |
88.6 |
13.7 |
| 93704 |
Male |
2 |
NA |
NA |
Non-Hispanic White |
NA |
NA |
94.2 |
13.9 |
| 93705 |
Female |
66 |
Divorced |
9-11th grade (Includes 12th grad |
Non-Hispanic Black |
6.2 |
16 |
158.3 |
79.5 |
| 93706 |
Male |
18 |
NA |
NA |
Non-Hispanic Asian |
5.2 |
23 |
175.7 |
66.3 |
| 93707 |
Male |
13 |
NA |
NA |
Other Race - Including Multi-Rac |
5.6 |
NA |
158.4 |
45.4 |
| 93708 |
Female |
66 |
Married |
Less than 9th grade |
Non-Hispanic Asian |
6.2 |
20 |
150.2 |
53.5 |
dim(stat_project_analytic_data)
## [1] 9254 10
#Data cleaning
#filter age 20-80
stat_project_analytic_data_cleaned <-stat_project_analytic_data %>%
dplyr::filter(RIDAGEYR >= 20 & RIDAGEYR <= 80)
#mutate to rename variables and create BMI variable
stat_project_analytic_data_cleaned <-stat_project_analytic_data %>%
dplyr::filter(RIDAGEYR >= 20 & RIDAGEYR<= 80)%>%
mutate(DMDMARTL = recode_factor(DMDMARTL, "Married" = "Married",
"Widowed" = "Other",
"Refused" = "NA",
"Divorced" = "Other",
"Separated"= "Other",
"Living with partner" = "Other",
"Never married" = "Unmarried"),
DMDEDUC2 = recode_factor(DMDEDUC2, "Don't Know" = "NA",
"Refused" = "NA",
"9-11th grade (Includes 12th grad" = "9-11th grade ",
"High school graduate/GED or equi" = "College graduate or higher",
"Some college or AA degree"= "College graduate or higher",
"College graduate or above" = "College graduate or higher"),
BMXHT = BMXHT/100,
BMI = BMXWT/BMXHT^2,
age.cat = cut(x = RIDAGEYR,
breaks = c(20, 44, 64, 79, Inf),
labels = c("20-44", "45-64", "65-79", ">=80")),
BMI.cat = cut(x = BMI,
breaks = c(0, 18.5, 25, 30, +Inf),
labels = c("<18.5", "18.5-24.9", "25-29.9", ">=30"))
)
#Create hbaic category a
stat_project_analytic_data_cleaned$LBXGH<- ifelse(stat_project_analytic_data_cleaned$LBXGH < 6.5, 0, 1)
stat_project_analytic_data_cleaned$LBXGH<- as.factor(stat_project_analytic_data_cleaned$LBXGH)
#create diabetes status category
stat_project_analytic_data_cleaned$db_status[stat_project_analytic_data_cleaned$LBXGH == "0"]<- "Non_diabetic"
stat_project_analytic_data_cleaned$db_status[stat_project_analytic_data_cleaned$LBXGH == "1"]<- "Diabetic"
stat_project_analytic_data_cleaned$db_status<-as.factor(stat_project_analytic_data_cleaned$db_status)
# EDA
table(stat_project_analytic_data_cleaned$RIAGENDR)
##
## Male Female
## 2702 2867
table(stat_project_analytic_data_cleaned$age.cat)
##
## 20-44 45-64 65-79 >=80
## 2019 1974 1073 427
table(stat_project_analytic_data_cleaned$DMDMARTL)
##
## Married Other NA Unmarried
## 2737 1820 6 1006
table(stat_project_analytic_data_cleaned$DMDEDUC2)
##
## NA 9-11th grade
## 13 638
## College graduate or higher Less than 9th grade
## 4439 479
table(stat_project_analytic_data_cleaned$LBXGH)
##
## 0 1
## 4273 745
table(stat_project_analytic_data_cleaned$BMI.cat)
##
## <18.5 18.5-24.9 25-29.9 >=30
## 83 1264 1666 2162
table(stat_project_analytic_data_cleaned$RIDRETH3)
##
## Mexican American Other Hispanic
## 735 517
## Non-Hispanic White Non-Hispanic Black
## 1935 1298
## Non-Hispanic Asian Other Race - Including Multi-Rac
## 811 273
summary(stat_project_analytic_data_cleaned)
## SEQN RIAGENDR RIDAGEYR DMDMARTL
## Min. : 93705 Male :2702 Min. :20.0 Married :2737
## 1st Qu.: 95969 Female:2867 1st Qu.:36.0 Other :1820
## Median : 98274 Median :53.0 NA : 6
## Mean : 98288 Mean :51.5 Unmarried:1006
## 3rd Qu.:100621 3rd Qu.:66.0
## Max. :102956 Max. :80.0
##
## DMDEDUC2 RIDRETH3
## NA : 13 Mexican American : 735
## 9-11th grade : 638 Other Hispanic : 517
## College graduate or higher:4439 Non-Hispanic White :1935
## Less than 9th grade : 479 Non-Hispanic Black :1298
## Non-Hispanic Asian : 811
## Other Race - Including Multi-Rac: 273
##
## LBXGH combined_MET_Scores_hrs BMXHT BMXWT
## 0 :4273 Min. : 0.00 Min. :1.383 Min. : 32.60
## 1 : 745 1st Qu.: 0.00 1st Qu.:1.588 1st Qu.: 67.00
## NA's: 551 Median : 24.00 Median :1.658 Median : 79.10
## Mean : 92.90 Mean :1.663 Mean : 82.84
## 3rd Qu.: 89.33 3rd Qu.:1.736 3rd Qu.: 94.80
## Max. :9352.40 Max. :1.977 Max. :242.60
## NA's :384 NA's :384
## BMI age.cat BMI.cat db_status
## Min. :14.20 20-44:2019 <18.5 : 83 Diabetic : 745
## 1st Qu.:24.79 45-64:1974 18.5-24.9:1264 Non_diabetic:4273
## Median :28.64 65-79:1073 25-29.9 :1666 NA's : 551
## Mean :29.85 >=80 : 427 >=30 :2162
## 3rd Qu.:33.56 NA's : 76 NA's : 394
## Max. :86.16
## NA's :394
#EDA
par(mfrow = c(2, 3))
hist(stat_project_analytic_data_cleaned$RIDAGEYR)#AGE
hist(stat_project_analytic_data_cleaned$BMI)#BMI
plot(stat_project_analytic_data_cleaned$RIDRETH3)#RACE
plot(stat_project_analytic_data_cleaned$RIAGENDR)#Gender
plot(stat_project_analytic_data_cleaned$LBXGH)#DM status

#select diabetes status, PA levels, and covariates that have been shown to be associated with increased prevalence of diabetes (age, gender, race, bmi).We had pre-determined covariates, so we did not perform correlation analysis.
NHANES_DB_PA_DATA_ANALYSIS<- stat_project_analytic_data_cleaned%>%
dplyr::select(SEQN, RIAGENDR, age.cat, DMDMARTL, DMDEDUC2, RIDRETH3, combined_MET_Scores_hrs, BMI.cat, db_status)
kable(head(NHANES_DB_PA_DATA_ANALYSIS))
| 93705 |
Female |
65-79 |
Other |
9-11th grade |
Non-Hispanic Black |
16 |
>=30 |
Non_diabetic |
| 93708 |
Female |
65-79 |
Married |
Less than 9th grade |
Non-Hispanic Asian |
20 |
18.5-24.9 |
Non_diabetic |
| 93709 |
Female |
65-79 |
Other |
College graduate or higher |
Non-Hispanic Black |
24 |
>=30 |
Non_diabetic |
| 93711 |
Male |
45-64 |
Married |
College graduate or higher |
Non-Hispanic Asian |
60 |
18.5-24.9 |
Non_diabetic |
| 93713 |
Male |
65-79 |
Other |
College graduate or higher |
Non-Hispanic White |
30 |
18.5-24.9 |
Non_diabetic |
| 93714 |
Female |
45-64 |
Married |
College graduate or higher |
Non-Hispanic Black |
0 |
>=30 |
Diabetic |
#RENAME VARIABLES
NHANES_DB_PA_DATA_ANALYSIS$Age<- NHANES_DB_PA_DATA_ANALYSIS$age.cat
NHANES_DB_PA_DATA_ANALYSIS$marital_status<- NHANES_DB_PA_DATA_ANALYSIS$DMDMARTL
NHANES_DB_PA_DATA_ANALYSIS$education<- NHANES_DB_PA_DATA_ANALYSIS$DMDEDUC2
NHANES_DB_PA_DATA_ANALYSIS$Gender<-NHANES_DB_PA_DATA_ANALYSIS$RIAGENDR
NHANES_DB_PA_DATA_ANALYSIS$race <- NHANES_DB_PA_DATA_ANALYSIS$RIDRETH3
NHANES_DB_PA_DATA_ANALYSIS_2<- NHANES_DB_PA_DATA_ANALYSIS%>%
dplyr::select(SEQN,combined_MET_Scores_hrs, BMI.cat, db_status, Age, Gender, marital_status, education, race)
kable(head(NHANES_DB_PA_DATA_ANALYSIS_2))
| 93705 |
16 |
>=30 |
Non_diabetic |
65-79 |
Female |
Other |
9-11th grade |
Non-Hispanic Black |
| 93708 |
20 |
18.5-24.9 |
Non_diabetic |
65-79 |
Female |
Married |
Less than 9th grade |
Non-Hispanic Asian |
| 93709 |
24 |
>=30 |
Non_diabetic |
65-79 |
Female |
Other |
College graduate or higher |
Non-Hispanic Black |
| 93711 |
60 |
18.5-24.9 |
Non_diabetic |
45-64 |
Male |
Married |
College graduate or higher |
Non-Hispanic Asian |
| 93713 |
30 |
18.5-24.9 |
Non_diabetic |
65-79 |
Male |
Other |
College graduate or higher |
Non-Hispanic White |
| 93714 |
0 |
>=30 |
Diabetic |
45-64 |
Female |
Married |
College graduate or higher |
Non-Hispanic Black |
#Remove NA's
complete_NHANES_DB_PA_DATA_ANALYSIS_3<-na.omit(NHANES_DB_PA_DATA_ANALYSIS_2)
#subset
#remove rows where column matital status & education is equal to "NA"
complete_NHANES_DB_PA_DATA_ANALYSIS_3 <- subset(complete_NHANES_DB_PA_DATA_ANALYSIS_3, complete_NHANES_DB_PA_DATA_ANALYSIS_3$education != "NA")
complete_NHANES_DB_PA_DATA_ANALYSIS_3 <- subset(complete_NHANES_DB_PA_DATA_ANALYSIS_3, complete_NHANES_DB_PA_DATA_ANALYSIS_3$marital_status != "NA")
summary(complete_NHANES_DB_PA_DATA_ANALYSIS_3)
## SEQN combined_MET_Scores_hrs BMI.cat db_status
## Min. : 93705 Min. : 0.00 <18.5 : 69 Diabetic : 722
## 1st Qu.: 95945 1st Qu.: 0.00 18.5-24.9:1166 Non_diabetic:4139
## Median : 98257 Median : 26.00 25-29.9 :1571
## Mean : 98275 Mean : 93.14 >=30 :2055
## 3rd Qu.:100590 3rd Qu.: 90.00
## Max. :102956 Max. :9352.40
## Age Gender marital_status education
## 20-44:1771 Male :2332 Married :2483 NA : 0
## 45-64:1808 Female:2529 Other :1560 9-11th grade : 547
## 65-79: 946 NA : 0 College graduate or higher:3900
## >=80 : 336 Unmarried: 818 Less than 9th grade : 414
##
##
## race
## Mexican American : 651
## Other Hispanic : 460
## Non-Hispanic White :1700
## Non-Hispanic Black :1113
## Non-Hispanic Asian : 693
## Other Race - Including Multi-Rac: 244
dim(complete_NHANES_DB_PA_DATA_ANALYSIS_3)
## [1] 4861 9
kable(head(complete_NHANES_DB_PA_DATA_ANALYSIS_3))
| 93705 |
16 |
>=30 |
Non_diabetic |
65-79 |
Female |
Other |
9-11th grade |
Non-Hispanic Black |
| 93708 |
20 |
18.5-24.9 |
Non_diabetic |
65-79 |
Female |
Married |
Less than 9th grade |
Non-Hispanic Asian |
| 93709 |
24 |
>=30 |
Non_diabetic |
65-79 |
Female |
Other |
College graduate or higher |
Non-Hispanic Black |
| 93711 |
60 |
18.5-24.9 |
Non_diabetic |
45-64 |
Male |
Married |
College graduate or higher |
Non-Hispanic Asian |
| 93713 |
30 |
18.5-24.9 |
Non_diabetic |
65-79 |
Male |
Other |
College graduate or higher |
Non-Hispanic White |
| 93714 |
0 |
>=30 |
Diabetic |
45-64 |
Female |
Married |
College graduate or higher |
Non-Hispanic Black |
#write data as a table
write.table(complete_NHANES_DB_PA_DATA_ANALYSIS_3, file = 'complete_NHANES_DB_PA_DATA_ANALYSIS_33.txt', quote = FALSE,sep='\t', col.names = NA, row.names = TRUE)
#Load cleaned data
NHANES_COMPLETE<- read_excel("complete_NHANES_DB_PA_DATA_ANALYSIS_33.xlsx")
typeof(NHANES_COMPLETE)
## [1] "list"
NHANES_COMPLETE<- as.data.frame(NHANES_COMPLETE)
#EDA
summary(NHANES_COMPLETE)
## SEQN combined_MET_Scores_hrs BMI.cat db_status
## Min. : 93705 Min. : 0.00 Length:4861 Length:4861
## 1st Qu.: 95945 1st Qu.: 0.00 Class :character Class :character
## Median : 98257 Median : 26.00 Mode :character Mode :character
## Mean : 98275 Mean : 93.14
## 3rd Qu.:100590 3rd Qu.: 90.00
## Max. :102956 Max. :9352.40
## Age Gender marital_status education
## Length:4861 Length:4861 Length:4861 Length:4861
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## race
## Length:4861
## Class :character
## Mode :character
##
##
##
str(NHANES_COMPLETE)
## 'data.frame': 4861 obs. of 9 variables:
## $ SEQN : num 93705 93708 93709 93711 93713 ...
## $ combined_MET_Scores_hrs: num 16 20 24 60 30 ...
## $ BMI.cat : chr ">=30" "18.5-24.9" ">=30" "18.5-24.9" ...
## $ db_status : chr "Non_diabetic" "Non_diabetic" "Non_diabetic" "Non_diabetic" ...
## $ Age : chr "65-79" "65-79" "65-79" "45-64" ...
## $ Gender : chr "Female" "Female" "Female" "Male" ...
## $ marital_status : chr "Other" "Married" "Other" "Married" ...
## $ education : chr "9-11th grade" "Less than 9th grade" "College graduate or higher" "College graduate or higher" ...
## $ race : chr "Non-Hispanic Black" "Non-Hispanic Asian" "Non-Hispanic Black" "Non-Hispanic Asian" ...
#convert to variables to factor
NHANES_COMPLETE$BMI.cat <- as.factor(NHANES_COMPLETE$BMI.cat)
NHANES_COMPLETE$db_status <- as.factor(NHANES_COMPLETE$db_status)
NHANES_COMPLETE$Age <- as.factor(NHANES_COMPLETE$Age)
NHANES_COMPLETE$Gender <- as.factor(NHANES_COMPLETE$Gender)
NHANES_COMPLETE$marital_status <- as.factor(NHANES_COMPLETE$marital_status)
NHANES_COMPLETE$race <- as.factor(NHANES_COMPLETE$race)
NHANES_COMPLETE$education <- as.factor(NHANES_COMPLETE$education)
str(NHANES_COMPLETE)
## 'data.frame': 4861 obs. of 9 variables:
## $ SEQN : num 93705 93708 93709 93711 93713 ...
## $ combined_MET_Scores_hrs: num 16 20 24 60 30 ...
## $ BMI.cat : Factor w/ 4 levels "<18.5",">=30",..: 2 3 2 3 3 2 3 2 3 3 ...
## $ db_status : Factor w/ 2 levels "Diabetic","Non_diabetic": 2 2 2 2 2 1 2 2 2 2 ...
## $ Age : Factor w/ 4 levels ">=80","20-44",..: 4 4 4 3 4 3 4 3 2 3 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 2 ...
## $ marital_status : Factor w/ 3 levels "Married","Other",..: 2 1 2 1 2 1 1 1 3 3 ...
## $ education : Factor w/ 3 levels "9-11th grade",..: 1 3 2 2 2 2 2 2 2 2 ...
## $ race : Factor w/ 6 levels "Mexican American",..: 3 2 3 2 4 3 6 2 4 3 ...
#rename variables again
NHANES_COMPLETE <-NHANES_COMPLETE%>%
dplyr::select(combined_MET_Scores_hrs, BMI.cat, db_status, Age, Gender, marital_status, education, race)%>%
mutate(race = recode_factor(race, "Mexican American" = "Hispanic",
"Other Hispanic" = "Hispanic",
"Other Race - Including Multi-Rac" = "Other/Multiracial",
"Non-Hispanic Asian" = "Asian",
"Non-Hispanic Black" = "Black",
"Non-Hispanic White" = "White"))
reorder_size <- function(x) {
factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}
#create Mets score factor
NHANES_COMPLETE<-NHANES_COMPLETE%>%
mutate(MET.cat = cut(x = combined_MET_Scores_hrs,
breaks = c(-Inf,0, 1000, +Inf),
labels = c("No_physical_activity", "mod_PA_level", "High_PA_level"))
)
summary(NHANES_COMPLETE)
## combined_MET_Scores_hrs BMI.cat db_status Age
## Min. : 0.00 <18.5 : 69 Diabetic : 722 >=80 : 336
## 1st Qu.: 0.00 >=30 :2055 Non_diabetic:4139 20-44:1771
## Median : 26.00 18.5-24.9:1166 45-64:1808
## Mean : 93.14 25-29.9 :1571 65-79: 946
## 3rd Qu.: 90.00
## Max. :9352.40
## Gender marital_status education
## Female:2529 Married :2483 9-11th grade : 547
## Male :2332 Other :1560 College graduate or higher:3900
## Unmarried: 818 Less than 9th grade : 414
##
##
##
## race MET.cat
## Hispanic :1111 No_physical_activity:1259
## Other/Multiracial: 244 mod_PA_level :3583
## Asian : 693 High_PA_level : 19
## Black :1113
## White :1700
##
# More EDA
ggplot(NHANES_COMPLETE, aes(x = reorder_size(BMI.cat)))+
geom_bar(color = "blue", fill="blue")+
facet_grid(~ db_status)+
theme_minimal()+
xlab("BMI Category")+
scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(Age)))+
geom_bar(color = "blue", fill="blue")+
facet_grid(~ db_status)+
theme_minimal()+
xlab("Age Category")+
scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(db_status)))+
geom_bar(color = "blue", fill="blue")+
theme_minimal()+
xlab("DM Status")+
scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(MET.cat)))+
geom_bar(color = "blue", fill="blue")+
facet_grid(~ db_status)+
theme_minimal()+
xlab("METscore Category")+
scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(marital_status))+
geom_bar(color = "blue", fill="blue")+
facet_grid(~ db_status)+
theme_minimal()+
xlab("Marital status")+
scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(race)))+
geom_bar(color = "blue", fill="blue")+
facet_grid(~ db_status)+
theme_minimal()+
xlab("Race")+
scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(Gender)))+
geom_bar(color = "blue", fill="blue")+
facet_grid(~ db_status)+
theme_minimal()+
xlab("Gender")+
scale_x_discrete( guide = guide_axis(angle = 70))

#normality assumption
##shapiro-wilks and qqplots
#shapiro
shapiro.test(NHANES_COMPLETE$combined_MET_Scores_hrs)
##
## Shapiro-Wilk normality test
##
## data: NHANES_COMPLETE$combined_MET_Scores_hrs
## W = 0.19001, p-value < 2.2e-16
#qqplots
ggqqplot(NHANES_COMPLETE$combined_MET_Scores_hrs)

#other variables are categorical so normality testing was not performed. the METs scores were not normally distributed as seen by the shapiro-wilks test (p , 0.05) and qq-plots results
#Ho= there is no significant association between PA level and diabetes status
#Ha= there is a significant association between PA level and diabetes status
# chi-square for categorical variables
chisq.test(NHANES_COMPLETE$BMI.cat, NHANES_COMPLETE$db_status)
##
## Pearson's Chi-squared test
##
## data: NHANES_COMPLETE$BMI.cat and NHANES_COMPLETE$db_status
## X-squared = 99.689, df = 3, p-value < 2.2e-16
chisq.test(NHANES_COMPLETE$Age, NHANES_COMPLETE$db_status)
##
## Pearson's Chi-squared test
##
## data: NHANES_COMPLETE$Age and NHANES_COMPLETE$db_status
## X-squared = 317.1, df = 3, p-value < 2.2e-16
chisq.test(NHANES_COMPLETE$Gender, NHANES_COMPLETE$db_status)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: NHANES_COMPLETE$Gender and NHANES_COMPLETE$db_status
## X-squared = 4.797, df = 1, p-value = 0.02851
chisq.test(NHANES_COMPLETE$marital_status, NHANES_COMPLETE$db_status)
##
## Pearson's Chi-squared test
##
## data: NHANES_COMPLETE$marital_status and NHANES_COMPLETE$db_status
## X-squared = 33.638, df = 2, p-value = 4.96e-08
chisq.test(NHANES_COMPLETE$education, NHANES_COMPLETE$db_status)
##
## Pearson's Chi-squared test
##
## data: NHANES_COMPLETE$education and NHANES_COMPLETE$db_status
## X-squared = 42.801, df = 2, p-value = 5.079e-10
chisq.test(NHANES_COMPLETE$race, NHANES_COMPLETE$db_status)
##
## Pearson's Chi-squared test
##
## data: NHANES_COMPLETE$race and NHANES_COMPLETE$db_status
## X-squared = 17.518, df = 4, p-value = 0.001533
chisq.test(NHANES_COMPLETE$MET.cat, NHANES_COMPLETE$db_status)
## Warning in chisq.test(NHANES_COMPLETE$MET.cat, NHANES_COMPLETE$db_status): Chi-
## squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: NHANES_COMPLETE$MET.cat and NHANES_COMPLETE$db_status
## X-squared = 41.886, df = 2, p-value = 8.028e-10
#T-test
t.test( NHANES_COMPLETE$combined_MET_Scores_hrs ~ NHANES_COMPLETE$db_status)
##
## Welch Two Sample t-test
##
## data: NHANES_COMPLETE$combined_MET_Scores_hrs by NHANES_COMPLETE$db_status
## t = -2.384, df = 1108.3, p-value = 0.01729
## alternative hypothesis: true difference in means between group Diabetic and group Non_diabetic is not equal to 0
## 95 percent confidence interval:
## -53.221530 -5.166547
## sample estimates:
## mean in group Diabetic mean in group Non_diabetic
## 68.27802 97.47206
#create table one. Categorical Chisquare, continous t-test
CreateTableOne(data= NHANES_COMPLETE, strata = "db_status", vars = c("Age", "Gender", "BMI.cat","marital_status", "education", "race", "MET.cat", "combined_MET_Scores_hrs" ))
## Stratified by db_status
## Diabetic Non_diabetic p test
## n 722 4139
## Age (%) <0.001
## >=80 69 ( 9.6) 267 ( 6.5)
## 20-44 60 ( 8.3) 1711 (41.3)
## 45-64 343 (47.5) 1465 (35.4)
## 65-79 250 (34.6) 696 (16.8)
## Gender = Male (%) 374 (51.8) 1958 (47.3) 0.029
## BMI.cat (%) <0.001
## <18.5 1 ( 0.1) 68 ( 1.6)
## >=30 411 (56.9) 1644 (39.7)
## 18.5-24.9 91 (12.6) 1075 (26.0)
## 25-29.9 219 (30.3) 1352 (32.7)
## marital_status (%) <0.001
## Married 415 (57.5) 2068 (50.0)
## Other 238 (33.0) 1322 (31.9)
## Unmarried 69 ( 9.6) 749 (18.1)
## education (%) <0.001
## 9-11th grade 90 (12.5) 457 (11.0)
## College graduate or higher 527 (73.0) 3373 (81.5)
## Less than 9th grade 105 (14.5) 309 ( 7.5)
## race (%) 0.002
## Hispanic 185 (25.6) 926 (22.4)
## Other/Multiracial 38 ( 5.3) 206 ( 5.0)
## Asian 108 (15.0) 585 (14.1)
## Black 187 (25.9) 926 (22.4)
## White 204 (28.3) 1496 (36.1)
## MET.cat (%) <0.001
## No_physical_activity 255 (35.3) 1004 (24.3)
## mod_PA_level 462 (64.0) 3121 (75.4)
## High_PA_level 5 ( 0.7) 14 ( 0.3)
## combined_MET_Scores_hrs (mean (SD)) 68.28 (294.74) 97.47 (350.26) 0.035
#Model1- Model I only comparing diabetes to physical activity
logit1<- glm(formula= db_status~MET.cat, data= NHANES_COMPLETE, family='binomial')
summary(logit1)
##
## Call:
## glm(formula = db_status ~ MET.cat, family = "binomial", data = NHANES_COMPLETE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0240 0.5254 0.5254 0.5254 0.7815
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.37048 0.07013 19.543 < 2e-16 ***
## MET.catmod_PA_level 0.53986 0.08604 6.275 3.5e-10 ***
## MET.catHigh_PA_level -0.34086 0.52569 -0.648 0.517
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4084.7 on 4860 degrees of freedom
## Residual deviance: 4045.1 on 4858 degrees of freedom
## AIC: 4051.1
##
## Number of Fisher Scoring iterations: 4
odds.n.ends(logit1)
## Waiting for profiling to be done...
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 39.552 2 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 4139 722 4861
## 0 0 0 0
## Sum 4139 722 4861
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 85.14709
##
## $`Model sensitivity`
## [1] 1
##
## $`Model specificity`
## [1] 0
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 3.9372549 3.4378852 4.526118
## MET.catmod_PA_level 1.7157668 1.4481933 2.029324
## MET.catHigh_PA_level 0.7111554 0.2693302 2.217103
rocplot(logit1)

#Model2 comparing diabetes to physical activity, adjusted for baseline gender, age, and race
logit2<- glm(formula= db_status~MET.cat+Gender+Age+race, data = NHANES_COMPLETE, family='binomial'(link=logit))
summary(logit2)
##
## Call:
## glm(formula = db_status ~ MET.cat + Gender + Age + race, family = binomial(link = logit),
## data = NHANES_COMPLETE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8015 0.2330 0.3644 0.6743 1.0291
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.87961 0.17378 5.062 4.16e-07 ***
## MET.catmod_PA_level 0.31111 0.09122 3.410 0.000649 ***
## MET.catHigh_PA_level -0.38428 0.54959 -0.699 0.484416
## GenderMale -0.17532 0.08475 -2.069 0.038578 *
## Age20-44 2.11473 0.19629 10.773 < 2e-16 ***
## Age45-64 0.25065 0.15583 1.608 0.107731
## Age65-79 -0.23532 0.15840 -1.486 0.137392
## raceOther/Multiracial 0.03573 0.20414 0.175 0.861065
## raceAsian 0.06845 0.13817 0.495 0.620325
## raceBlack 0.09933 0.11913 0.834 0.404395
## raceWhite 0.59870 0.11773 5.085 3.67e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4084.7 on 4860 degrees of freedom
## Residual deviance: 3661.5 on 4850 degrees of freedom
## AIC: 3683.5
##
## Number of Fisher Scoring iterations: 6
odds.n.ends(logit2)
## Waiting for profiling to be done...
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 423.161 10 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 4139 722 4861
## 0 0 0 0
## Sum 4139 722 4861
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 85.14709
##
## $`Model sensitivity`
## [1] 1
##
## $`Model specificity`
## [1] 0
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 2.4099712 1.7216053 3.4045349
## MET.catmod_PA_level 1.3649350 1.1404844 1.6309366
## MET.catHigh_PA_level 0.6809392 0.2431054 2.1986726
## GenderMale 0.8391871 0.7106172 0.9907398
## Age20-44 8.2873875 5.6457383 12.2025714
## Age45-64 1.2848635 0.9418372 1.7362182
## Age65-79 0.7903165 0.5765364 1.0735610
## raceOther/Multiracial 1.0363745 0.7006954 1.5626749
## raceAsian 1.0708435 0.8180847 1.4066951
## raceBlack 1.1044275 0.8743948 1.3951350
## raceWhite 1.8197491 1.4447067 2.2925308
rocplot(logit2)

# Model3 comparing diabetes to physical activity,adjusted for baseline gender, age, race, education level, marital status, BMI
logit3<- glm(formula = db_status~ MET.cat+Gender+Age+race+marital_status+education+BMI.cat, data= NHANES_COMPLETE, family='binomial')
summary(logit3)
##
## Call:
## glm(formula = db_status ~ MET.cat + Gender + Age + race + marital_status +
## education + BMI.cat, family = "binomial", data = NHANES_COMPLETE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9305 0.1971 0.3629 0.6521 1.2866
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.06721 1.03457 2.965 0.00303 **
## MET.catmod_PA_level 0.27600 0.09322 2.961 0.00307 **
## MET.catHigh_PA_level -0.48182 0.56247 -0.857 0.39166
## GenderMale -0.19639 0.08777 -2.237 0.02526 *
## Age20-44 2.27662 0.20600 11.052 < 2e-16 ***
## Age45-64 0.44361 0.16216 2.736 0.00623 **
## Age65-79 -0.07388 0.16286 -0.454 0.65010
## raceOther/Multiracial -0.15810 0.21398 -0.739 0.46001
## raceAsian -0.42496 0.15310 -2.776 0.00551 **
## raceBlack -0.07606 0.13380 -0.568 0.56973
## raceWhite 0.42752 0.13148 3.252 0.00115 **
## marital_statusOther 0.15193 0.09788 1.552 0.12060
## marital_statusUnmarried 0.07850 0.15357 0.511 0.60923
## educationCollege graduate or higher 0.19317 0.13465 1.435 0.15141
## educationLess than 9th grade -0.27912 0.17865 -1.562 0.11819
## BMI.cat>=30 -2.77099 1.01469 -2.731 0.00632 **
## BMI.cat18.5-24.9 -1.53522 1.01890 -1.507 0.13188
## BMI.cat25-29.9 -2.11005 1.01570 -2.077 0.03776 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4084.7 on 4860 degrees of freedom
## Residual deviance: 3528.7 on 4843 degrees of freedom
## AIC: 3564.7
##
## Number of Fisher Scoring iterations: 6
odds.n.ends(logit3)
## Waiting for profiling to be done...
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 556.034 17 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 4130 711 4841
## 0 9 11 20
## Sum 4139 722 4861
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 85.18823
##
## $`Model sensitivity`
## [1] 0.9978256
##
## $`Model specificity`
## [1] 0.01523546
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 21.48191720 4.346606896 389.8093991
## MET.catmod_PA_level 1.31784229 1.096823712 1.5808109
## MET.catHigh_PA_level 0.61766069 0.215932043 2.0429546
## GenderMale 0.82169161 0.691652866 0.9758021
## Age20-44 9.74367616 6.515689148 14.6269937
## Age45-64 1.55832799 1.128963096 2.1334311
## Age65-79 0.92878461 0.671942732 1.2732542
## raceOther/Multiracial 0.85376451 0.565642808 1.3109837
## raceAsian 0.65379342 0.484516244 0.8833529
## raceBlack 0.92676089 0.712460769 1.2041144
## raceWhite 1.53344490 1.184367187 1.9835253
## marital_statusOther 1.16408228 0.961587529 1.4114958
## marital_statusUnmarried 1.08166097 0.804233503 1.4694569
## educationCollege graduate or higher 1.21308315 0.927696794 1.5734655
## educationLess than 9th grade 0.75645033 0.532384178 1.0730132
## BMI.cat>=30 0.06260008 0.003516389 0.2906925
## BMI.cat18.5-24.9 0.21540881 0.012051581 1.0143943
## BMI.cat25-29.9 0.12123233 0.006803364 0.5648862
rocplot(logit3)

#females subgroup
female_NHANES_COMPLETE<- NHANES_COMPLETE%>%
filter(Gender == "Female")
female_NHANES_COMPLETE <-female_NHANES_COMPLETE%>%
dplyr::select( combined_MET_Scores_hrs, BMI.cat, db_status, Age, Gender, marital_status, education, race)%>%
mutate(race = recode_factor(race, "Mexican American" = "Hispanic",
"Other Hispanic" = "Hispanic",
"Other Race - Including Multi-Rac" = "Other/Multiracial",
"Non-Hispanic Asian" = "Asian",
"Non-Hispanic Black" = "Black",
"Non-Hispanic White" = "White"))
reorder_size <- function(x) {
factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}
#create Mets score factor
female_NHANES_COMPLETE<-female_NHANES_COMPLETE%>%
mutate(MET.cat = cut(x = combined_MET_Scores_hrs,
breaks = c(-Inf,0, 1000, +Inf),
labels = c("No_physical_activity", "mod_PA_level", "High_PA_level"))
)
summary(female_NHANES_COMPLETE)
## combined_MET_Scores_hrs BMI.cat db_status Age
## Min. : 0.00 <18.5 : 40 Diabetic : 348 >=80 :172
## 1st Qu.: 0.00 >=30 :1127 Non_diabetic:2181 20-44:961
## Median : 18.67 18.5-24.9: 635 45-64:936
## Mean : 72.40 25-29.9 : 727 65-79:460
## 3rd Qu.: 60.00
## Max. :8047.20
## Gender marital_status education
## Female:2529 Married :1185 9-11th grade : 258
## Male : 0 Other : 940 College graduate or higher:2062
## Unmarried: 404 Less than 9th grade : 209
##
##
##
## race MET.cat
## Hispanic :588 No_physical_activity: 764
## Other/Multiracial:111 mod_PA_level :1755
## Asian :374 High_PA_level : 10
## Black :593
## White :863
##
#Female log regression
#model1
Flogit1<- glm(formula= db_status~MET.cat, data= female_NHANES_COMPLETE, family='binomial')
summary(Flogit1)
##
## Call:
## glm(formula = db_status ~ MET.cat, family = "binomial", data = female_NHANES_COMPLETE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0699 0.4997 0.4997 0.4997 0.6681
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.49451 0.09352 15.981 < 2e-16 ***
## MET.catmod_PA_level 0.52298 0.11935 4.382 1.18e-05 ***
## MET.catHigh_PA_level -0.10821 0.79608 -0.136 0.892
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2026.2 on 2528 degrees of freedom
## Residual deviance: 2007.2 on 2526 degrees of freedom
## AIC: 2013.2
##
## Number of Fisher Scoring iterations: 4
odds.n.ends(Flogit1)
## Waiting for profiling to be done...
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 18.957 2 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 2181 348 2529
## 0 0 0 0
## Sum 2181 348 2529
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 86.23962
##
## $`Model sensitivity`
## [1] 1
##
## $`Model specificity`
## [1] 0
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 4.4571429 3.7233636 5.373603
## MET.catmod_PA_level 1.6870488 1.3334127 2.129606
## MET.catHigh_PA_level 0.8974359 0.2219567 5.988737
rocplot(Flogit1)

#Model2 comparing diabetes to physical activity, adjusted for baseline age, and race
GFlogit2<- glm(formula= db_status~MET.cat+Age+race, data = female_NHANES_COMPLETE, family='binomial'(link=logit))
summary(GFlogit2)
##
## Call:
## glm(formula = db_status ~ MET.cat + Age + race, family = binomial(link = logit),
## data = female_NHANES_COMPLETE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7658 0.2718 0.3587 0.6576 0.9188
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.99717 0.24781 4.024 5.73e-05 ***
## MET.catmod_PA_level 0.30146 0.12544 2.403 0.0163 *
## MET.catHigh_PA_level 0.07608 0.82952 0.092 0.9269
## Age20-44 1.82284 0.27495 6.630 3.36e-11 ***
## Age45-64 0.12272 0.23123 0.531 0.5956
## Age65-79 -0.24449 0.23804 -1.027 0.3044
## raceOther/Multiracial 0.15667 0.31906 0.491 0.6234
## raceAsian 0.15873 0.19432 0.817 0.4140
## raceBlack -0.10862 0.16306 -0.666 0.5053
## raceWhite 0.68128 0.17137 3.975 7.03e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2026.2 on 2528 degrees of freedom
## Residual deviance: 1835.1 on 2519 degrees of freedom
## AIC: 1855.1
##
## Number of Fisher Scoring iterations: 6
odds.n.ends(GFlogit2)
## Waiting for profiling to be done...
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 191.042 9 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 2181 348 2529
## 0 0 0 0
## Sum 2181 348 2529
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 86.23962
##
## $`Model sensitivity`
## [1] 1
##
## $`Model specificity`
## [1] 0
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 2.7106098 1.6852016 4.461988
## MET.catmod_PA_level 1.3518244 1.0557347 1.726774
## MET.catHigh_PA_level 1.0790502 0.2434876 7.502776
## Age20-44 6.1894148 3.5959868 10.602905
## Age45-64 1.1305687 0.7091528 1.760225
## Age65-79 0.7831011 0.4851937 1.236861
## raceOther/Multiracial 1.1696124 0.6425087 2.263197
## raceAsian 1.1720241 0.8036156 1.723582
## raceBlack 0.8970737 0.6510545 1.234517
## raceWhite 1.9764031 1.4134566 2.769391
rocplot(GFlogit2)

# Model3 comparing diabetes to physical activity,adjusted for baseline age, race, education level, marital status, BMI
Flogit3<- glm(formula = db_status~ MET.cat+Age+race+marital_status+education+BMI.cat, data= female_NHANES_COMPLETE, family='binomial')
summary(Flogit3)
##
## Call:
## glm(formula = db_status ~ MET.cat + Age + race + marital_status +
## education + BMI.cat, family = "binomial", data = female_NHANES_COMPLETE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.9346 0.2069 0.3708 0.6049 1.2305
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.31645 1.06986 2.165 0.03037 *
## MET.catmod_PA_level 0.28460 0.12833 2.218 0.02658 *
## MET.catHigh_PA_level 0.19391 0.84261 0.230 0.81800
## Age20-44 1.91863 0.28723 6.680 2.39e-11 ***
## Age45-64 0.30374 0.24170 1.257 0.20887
## Age65-79 -0.07035 0.24587 -0.286 0.77477
## raceOther/Multiracial 0.01321 0.33442 0.040 0.96848
## raceAsian -0.33848 0.21335 -1.587 0.11262
## raceBlack -0.18134 0.18242 -0.994 0.32018
## raceWhite 0.53760 0.18871 2.849 0.00439 **
## marital_statusOther 0.19643 0.13544 1.450 0.14697
## marital_statusUnmarried 0.35230 0.22283 1.581 0.11387
## educationCollege graduate or higher 0.23647 0.19514 1.212 0.22558
## educationLess than 9th grade -0.11557 0.25681 -0.450 0.65269
## BMI.cat>=30 -2.07309 1.02999 -2.013 0.04414 *
## BMI.cat18.5-24.9 -0.70084 1.03852 -0.675 0.49978
## BMI.cat25-29.9 -1.35365 1.03262 -1.311 0.18990
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2026.2 on 2528 degrees of freedom
## Residual deviance: 1759.5 on 2512 degrees of freedom
## AIC: 1793.5
##
## Number of Fisher Scoring iterations: 6
odds.n.ends(Flogit3)
## Waiting for profiling to be done...
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 266.686 16 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 2180 346 2526
## 0 1 2 3
## Sum 2181 348 2529
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 86.27916
##
## $`Model sensitivity`
## [1] 0.9995415
##
## $`Model specificity`
## [1] 0.005747126
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 10.1396494 1.853586456 190.2491391
## MET.catmod_PA_level 1.3292289 1.032208354 1.7075710
## MET.catHigh_PA_level 1.2139816 0.269279394 8.6217458
## Age20-44 6.8116082 3.867170851 11.9606666
## Age45-64 1.3549131 0.833859372 2.1563091
## Age65-79 0.9320663 0.569249294 1.4962249
## raceOther/Multiracial 1.0133022 0.538681097 2.0142405
## raceAsian 0.7128524 0.469932181 1.0859099
## raceBlack 0.8341514 0.582374903 1.1914173
## raceWhite 1.7118920 1.182266758 2.4794255
## marital_statusOther 1.2170446 0.934036668 1.5888741
## marital_statusUnmarried 1.4223319 0.928819873 2.2299874
## educationCollege graduate or higher 1.2667717 0.856064923 1.8423145
## educationLess than 9th grade 0.8908548 0.537395820 1.4732595
## BMI.cat>=30 0.1257958 0.006971679 0.6116315
## BMI.cat18.5-24.9 0.4961698 0.027273580 2.4797133
## BMI.cat25-29.9 0.2582960 0.014279399 1.2666657
rocplot(Flogit3)

#Males subgroup
male_NHANES_COMPLETE<- NHANES_COMPLETE%>%
filter(Gender == "Male")
male_NHANES_COMPLETE <-male_NHANES_COMPLETE%>%
dplyr::select(combined_MET_Scores_hrs, BMI.cat, db_status, Age, Gender, marital_status, education, race)%>%
mutate(race = recode_factor(race, "Mexican American" = "Hispanic",
"Other Hispanic" = "Hispanic",
"Other Race - Including Multi-Rac" = "Other/Multiracial",
"Non-Hispanic Asian" = "Asian",
"Non-Hispanic Black" = "Black",
"Non-Hispanic White" = "White"))
reorder_size <- function(x) {
factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}
#create Mets score factor
male_NHANES_COMPLETE<-male_NHANES_COMPLETE%>%
mutate(MET.cat = cut(x = combined_MET_Scores_hrs,
breaks = c(-Inf,0, 1000, +Inf),
labels = c("No_physical_activity", "mod_PA_level", "High_PA_level"))
)
summary(male_NHANES_COMPLETE)
## combined_MET_Scores_hrs BMI.cat db_status Age
## Min. : 0.00 <18.5 : 29 Diabetic : 374 >=80 :164
## 1st Qu.: 5.25 >=30 :928 Non_diabetic:1958 20-44:810
## Median : 40.00 18.5-24.9:531 45-64:872
## Mean : 115.62 25-29.9 :844 65-79:486
## 3rd Qu.: 128.33
## Max. :9352.40
## Gender marital_status education
## Female: 0 Married :1298 9-11th grade : 289
## Male :2332 Other : 620 College graduate or higher:1838
## Unmarried: 414 Less than 9th grade : 205
##
##
##
## race MET.cat
## Hispanic :523 No_physical_activity: 495
## Other/Multiracial:133 mod_PA_level :1828
## Asian :319 High_PA_level : 9
## Black :520
## White :837
##
#Males log regression
#Model1
Mlogit1<- glm(formula= db_status~MET.cat, data= male_NHANES_COMPLETE, family='binomial')
summary(Mlogit1)
##
## Call:
## glm(formula = db_status ~ MET.cat, family = "binomial", data = male_NHANES_COMPLETE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9828 0.5493 0.5493 0.5493 0.9005
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.1952 0.1064 11.230 < 2e-16 ***
## MET.catmod_PA_level 0.6197 0.1260 4.919 8.69e-07 ***
## MET.catHigh_PA_level -0.5021 0.7151 -0.702 0.483
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2053.5 on 2331 degrees of freedom
## Residual deviance: 2028.9 on 2329 degrees of freedom
## AIC: 2034.9
##
## Number of Fisher Scoring iterations: 4
odds.n.ends(Mlogit1)
## Waiting for profiling to be done...
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 24.595 2 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 1958 374 2332
## 0 0 0 0
## Sum 1958 374 2332
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 83.96226
##
## $`Model sensitivity`
## [1] 1
##
## $`Model specificity`
## [1] 0
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 3.3043478 2.6921112 4.087514
## MET.catmod_PA_level 1.8583470 1.4483247 2.374071
## MET.catHigh_PA_level 0.6052632 0.1570922 2.903094
rocplot(Mlogit1)

#Model2 comparing diabetes to physical activity, adjusted for baseline gender, age, and race
GFlogit2<- glm(formula= db_status~MET.cat+Age+race, data = male_NHANES_COMPLETE, family='binomial'(link=logit))
summary(GFlogit2)
##
## Call:
## glm(formula = db_status ~ MET.cat + Age + race, family = binomial(link = logit),
## data = male_NHANES_COMPLETE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8080 0.1979 0.5465 0.7086 1.0621
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.59574 0.23709 2.513 0.011980 *
## MET.catmod_PA_level 0.32369 0.13319 2.430 0.015083 *
## MET.catHigh_PA_level -0.68260 0.75265 -0.907 0.364441
## Age20-44 2.46116 0.28976 8.494 < 2e-16 ***
## Age45-64 0.36424 0.21275 1.712 0.086887 .
## Age65-79 -0.23949 0.21397 -1.119 0.263031
## raceOther/Multiracial -0.03441 0.26963 -0.128 0.898447
## raceAsian -0.03493 0.19757 -0.177 0.859664
## raceBlack 0.33444 0.17529 1.908 0.056397 .
## raceWhite 0.54224 0.16364 3.314 0.000921 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2053.5 on 2331 degrees of freedom
## Residual deviance: 1812.1 on 2322 degrees of freedom
## AIC: 1832.1
##
## Number of Fisher Scoring iterations: 6
odds.n.ends(GFlogit2)
## Waiting for profiling to be done...
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 241.479 9 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 1958 374 2332
## 0 0 0 0
## Sum 1958 374 2332
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 83.96226
##
## $`Model sensitivity`
## [1] 1
##
## $`Model specificity`
## [1] 0
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 1.8143723 1.1475199 2.911414
## MET.catmod_PA_level 1.3822225 1.0622607 1.791120
## MET.catHigh_PA_level 0.5053011 0.1188712 2.548143
## Age20-44 11.7184037 6.6940060 20.937722
## Age45-64 1.4394177 0.9404489 2.169314
## Age65-79 0.7870282 0.5129738 1.188886
## raceOther/Multiracial 0.9661738 0.5759888 1.663552
## raceAsian 0.9656715 0.6571821 1.427336
## raceBlack 1.3971609 0.9915275 1.972639
## raceWhite 1.7198511 1.2471225 2.370095
rocplot(GFlogit2)

# Model3 comparing diabetes to physical activity,adjusted for baseline gender, age, race, education level, marital status, BMI
Mlogit3<- glm(formula = db_status~ MET.cat+Age+race+marital_status+education+BMI.cat, data= male_NHANES_COMPLETE, family='binomial')
summary(Mlogit3)
##
## Call:
## glm(formula = db_status ~ MET.cat + Age + race + marital_status +
## education + BMI.cat, family = "binomial", data = male_NHANES_COMPLETE)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0020 0.1862 0.3613 0.6900 1.3372
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 15.36324 415.13710 0.037 0.97048
## MET.catmod_PA_level 0.25516 0.13671 1.867 0.06197 .
## MET.catHigh_PA_level -1.00689 0.76605 -1.314 0.18872
## Age20-44 2.72426 0.30871 8.825 < 2e-16 ***
## Age45-64 0.57663 0.22213 2.596 0.00944 **
## Age65-79 -0.09800 0.21993 -0.446 0.65588
## raceOther/Multiracial -0.28336 0.28490 -0.995 0.31995
## raceAsian -0.57439 0.22462 -2.557 0.01055 *
## raceBlack 0.04949 0.19904 0.249 0.80363
## raceWhite 0.33614 0.18570 1.810 0.07027 .
## marital_statusOther 0.06275 0.14407 0.436 0.66316
## marital_statusUnmarried -0.22341 0.21604 -1.034 0.30108
## educationCollege graduate or higher 0.13011 0.18804 0.692 0.48898
## educationLess than 9th grade -0.48030 0.25291 -1.899 0.05756 .
## BMI.cat>=30 -15.15300 415.13701 -0.037 0.97088
## BMI.cat18.5-24.9 -14.02730 415.13703 -0.034 0.97304
## BMI.cat25-29.9 -14.53871 415.13701 -0.035 0.97206
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2053.5 on 2331 degrees of freedom
## Residual deviance: 1750.3 on 2315 degrees of freedom
## AIC: 1784.3
##
## Number of Fisher Scoring iterations: 15
odds.n.ends(Mlogit3)
## Waiting for profiling to be done...
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
## $`Logistic regression model significance`
## Chi-squared d.f. p
## 303.202 16 <.001
##
## $`Contingency tables (model fit): frequency predicted`
## Number observed
## Number predicted 1 0 Sum
## 1 1948 362 2310
## 0 10 12 22
## Sum 1958 374 2332
##
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 84.04803
##
## $`Model sensitivity`
## [1] 0.9948927
##
## $`Model specificity`
## [1] 0.03208556
##
## $`Predictor odds ratios and 95% CI`
## OR 2.5 % 97.5 %
## (Intercept) 4.700778e+06 5.295678e-03 1.788754e+59
## MET.catmod_PA_level 1.290674e+00 9.850183e-01 1.683959e+00
## MET.catHigh_PA_level 3.653550e-01 8.420537e-02 1.887279e+00
## Age20-44 1.524515e+01 8.398596e+00 2.827640e+01
## Age45-64 1.780032e+00 1.143259e+00 2.735992e+00
## Age65-79 9.066473e-01 5.845095e-01 1.386669e+00
## raceOther/Multiracial 7.532519e-01 4.350536e-01 1.333785e+00
## raceAsian 5.630482e-01 3.623655e-01 8.750819e-01
## raceBlack 1.050737e+00 7.103867e-01 1.551434e+00
## raceWhite 1.399536e+00 9.703748e-01 2.011059e+00
## marital_statusOther 1.064759e+00 8.045320e-01 1.415843e+00
## marital_statusUnmarried 7.997835e-01 5.273159e-01 1.232532e+00
## educationCollege graduate or higher 1.138952e+00 7.814319e-01 1.635335e+00
## educationLess than 9th grade 6.185988e-01 3.756626e-01 1.013903e+00
## BMI.cat>=30 2.625045e-07 3.130560e-59 3.070531e+02
## BMI.cat18.5-24.9 8.091372e-07 5.355139e-59 8.500521e+02
## BMI.cat25-29.9 4.851958e-07 5.141191e-59 5.554286e+02
rocplot(Mlogit3)
