NHANES_2018_PA

library(nhanesA)

## Warning: package 'nhanesA' was built under R version 4.1.2

library(knitr)

## Warning: package 'knitr' was built under R version 4.1.2

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1

## Warning: package 'tidyr' was built under R version 4.1.2

## Warning: package 'readr' was built under R version 4.1.2

## Warning: package 'dplyr' was built under R version 4.1.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(plyr)

## Warning: package 'plyr' was built under R version 4.1.2

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following object is masked from 'package:purrr':
## 
##     compact

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Warning: package 'Matrix' was built under R version 4.1.2

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loading required package: survival

## Warning: package 'survival' was built under R version 4.1.2

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(ggplot2)
library(readxl)

## Warning: package 'readxl' was built under R version 4.1.2

library(car)

## Warning: package 'car' was built under R version 4.1.2

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.1.2

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

library(Rmisc)

## Warning: package 'Rmisc' was built under R version 4.1.2

## Loading required package: lattice

library(ggpubr)

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:plyr':
## 
##     mutate

library(caret)

## Warning: package 'caret' was built under R version 4.1.2

## 
## Attaching package: 'caret'

## The following object is masked from 'package:survival':
## 
##     cluster

## The following object is masked from 'package:purrr':
## 
##     lift

library(tableone)

## Warning: package 'tableone' was built under R version 4.1.2

library(odds.n.ends)
library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

library(Deducer)

## Loading required package: JGR

## Warning: package 'JGR' was built under R version 4.1.2

## Loading required package: rJava

## Loading required package: JavaGD

## 
## Please type JGR() to launch console. Platform specific launchers (.exe and .app) can also be obtained at http://www.rforge.net/JGR/files/.

## Loading required package: MASS

## Warning: package 'MASS' was built under R version 4.1.2

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## 
## 
## Note Non-JGR console detected:
##  Deducer is best used from within JGR (http://jgr.markushelbig.org/).
##  To Bring up GUI dialogs, type deducer().

library(broom)

## Warning: package 'broom' was built under R version 4.1.2

#DEMORAPHIC, EXAMINATION, LAB, QUESTIONNAIRE DATA

# Research aim- We aimed to quantify the risk of diabetes and determine whether a relationship is present between physical activity (PA)levels and diabetes.

#Population- USA Non-institutionalized citizens >= 20 years old, Outcome-Diabetes,  exposure- PA, Time- 2017 - 2018

#Covariates- gender, age, race, educational level, BMI, marital status


#DOWNLOAD NHANES DATA DIRECTLY TO R

#Explore the NHANES DATA 2018
kable(head(nhanesTables('DEMO', 2018)))

Data.File.Name	Data.File.Description
DEMO_J	Demographic Variables and Sample Weights

kable(head(nhanesTables('EXAM', 2018)))

Data.File.Name	Data.File.Description
BPX_J	Blood Pressure
BMX_J	Body Measures
OHXDEN_J	Oral Health - Dentition
OHXREF_J	Oral Health - Recommendation of Care
DXXFEM_J	Dual-Energy X-ray Absorptiometry - Femur
DXX_J	Dual-Energy X-ray Absorptiometry - Whole Body

kable(head(nhanesTables('Q', 2018)))

Data.File.Name	Data.File.Description
BPQ_J	Blood Pressure & Cholesterol
HUQ_J	Hospital Utilization & Access to Care
HEQ_J	Hepatitis
CDQ_J	Cardiovascular Health
PAQY_J	Physical Activity - Youth
PAQ_J	Physical Activity

#further explore NHANES 2018
kable(head(nhanesTableVars(data_group = 'DEMO', nh_table = 'DEMO_J', namesonly = FALSE)))

Variable.Name	Variable.Description
AIALANGA	Language of the MEC ACASI Interview Instrument
DMDBORN4	In what country {were you/was SP} born?
DMDCITZN	{Are you/Is SP} a citizen of the United States? [Information about citizenship is being collected by
DMDEDUC2	What is the highest grade or level of school {you have/SP has} completed or the highest degree {you
DMDEDUC3	What is the highest grade or level of school {you have/SP has} completed or the highest degree {you
DMDFMSIZ	Total number of people in the Family

kable(head(nhanesTableVars(data_group = 'EXAM', nh_table = 'BMX_J', namesonly = FALSE)))

Variable.Name	Variable.Description
BMDSTATS	Body Measures Component status Code
BMIARMC	Arm Circumference Comment
BMIARML	Upper Arm Length Comment
BMIHEAD	Head Circumference Comment
BMIHIP	Hip Circumference Comment
BMIHT	Standing Height Comment

kable(head(nhanesTableVars(data_group = 'LAB', nh_table = 'GHB_J', namesonly = FALSE)))

Variable.Name	Variable.Description
LBXGH	Glycohemoglobin (%)
SEQN	Respondent sequence number.

kable(head(nhanesTableVars(data_group = 'Q', nh_table = 'PAQ_J', namesonly = FALSE)))

Variable.Name	Variable.Description
PAD615	How much time {do you/does SP} spend doing vigorous-intensity activities at work on a typical day?
PAD630	How much time {do you/does SP} spend doing moderate-intensity activities at work on a typical day?
PAD645	How much time {do you/does SP} spend walking or bicycling for travel on a typical day?
PAD660	How much time {do you/does SP} spend doing vigorous-intensity sports, fitness or recreational activi
PAD675	How much time {do you/does SP} spend doing moderate-intensity sports, fitness or recreational activi
PAD680	The following question is about sitting at school, at home, getting to and from places, or with frie

#Download data  2017 - 2018 demographic data (id, age, gender, educational level, race)
demo_2018<-nhanes('DEMO_J')
names(demo_2018)

##  [1] "SEQN"     "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
##  [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM" "DMQMILIZ" "DMQADFC" 
## [13] "DMDBORN4" "DMDCITZN" "DMDYRSUS" "DMDEDUC3" "DMDEDUC2" "DMDMARTL"
## [19] "RIDEXPRG" "SIALANG"  "SIAPROXY" "SIAINTRP" "FIALANG"  "FIAPROXY"
## [25] "FIAINTRP" "MIALANG"  "MIAPROXY" "MIAINTRP" "AIALANGA" "DMDHHSIZ"
## [31] "DMDFMSIZ" "DMDHHSZA" "DMDHHSZB" "DMDHHSZE" "DMDHRGND" "DMDHRAGZ"
## [37] "DMDHREDZ" "DMDHRMAZ" "DMDHSEDZ" "WTINT2YR" "WTMEC2YR" "SDMVPSU" 
## [43] "SDMVSTRA" "INDHHIN2" "INDFMIN2" "INDFMPIR"

demo1<- demo_2018[c("SEQN", "RIAGENDR", "RIDAGEYR", "DMDMARTL", "DMDEDUC2",  "RIDRETH3")]

demo_vars<- names(demo1)
demo_2<-nhanesTranslate('DEMO_J', demo_vars, data = demo1)

## Warning in FUN(X[[i]], ...): No translation table is available for SEQN

## Translated columns: RIAGENDR DMDMARTL DMDEDUC2 RIDRETH3

kable(head(demo_2, n=5))

SEQN	RIAGENDR	RIDAGEYR	DMDMARTL	DMDEDUC2	RIDRETH3
93703	Female	2	NA	NA	Non-Hispanic Asian
93704	Male	2	NA	NA	Non-Hispanic White
93705	Female	66	Divorced	9-11th grade (Includes 12th grad	Non-Hispanic Black
93706	Male	18	NA	NA	Non-Hispanic Asian
93707	Male	13	NA	NA	Other Race - Including Multi-Rac

#Download data diabetes data (id, hba1c)
#labs-hbaic
db_lab_2018<- nhanes('GHB_J')
names(db_lab_2018)

## [1] "SEQN"  "LBXGH"

hbaic_lab<- db_lab_2018[c("SEQN","LBXGH")]

hbaic_lab_vars<- names(hbaic_lab)
hbaic_lab_2<-nhanesTranslate('GHB_J', hbaic_lab_vars, data = hbaic_lab)

## Warning in FUN(X[[i]], ...): No translation table is available for SEQN

## Warning in nhanesTranslate("GHB_J", hbaic_lab_vars, data = hbaic_lab): No
## columns were translated

kable(head(hbaic_lab_2, n=5))

SEQN	LBXGH
93705	6.2
93706	5.2
93707	5.6
93708	6.2
93709	6.3

#Download data PA
db_PA_2018<-nhanes('PAQ_J')
names(db_PA_2018)

##  [1] "SEQN"   "PAQ605" "PAQ610" "PAD615" "PAQ620" "PAQ625" "PAD630" "PAQ635"
##  [9] "PAQ640" "PAD645" "PAQ650" "PAQ655" "PAD660" "PAQ665" "PAQ670" "PAD675"
## [17] "PAD680"

db_PA_qs<- db_PA_2018[c("SEQN","PAD615","PAD630", "PAD645", "PAD660", "PAD675","PAQ610", "PAQ625", "PAQ640", "PAQ655", "PAQ670")]

db_PA_qs_vars<- names(db_PA_qs)
have_PA_qs_2<-nhanesTranslate('PAQ_J', db_PA_qs_vars, data = db_PA_qs)

## Warning in FUN(X[[i]], ...): No translation table is available for SEQN

## Warning in nhanesTranslate("PAQ_J", db_PA_qs_vars, data = db_PA_qs): No columns
## were translated

kable(head(have_PA_qs_2))

SEQN	PAD615	PAD630	PAD645	PAD660	PAD675	PAQ610	PAQ625	PAQ640	PAQ655	PAQ670
93705	NA	NA	NA	NA	60	NA	NA	NA	NA	2
93706	NA	NA	45	NA	30	NA	NA	5	NA	2
93708	NA	NA	NA	NA	30	NA	NA	NA	NA	5
93709	NA	180	NA	NA	NA	NA	2	NA	NA	NA
93711	NA	NA	60	60	30	NA	NA	5	4	2
93712	420	180	NA	45	90	6	6	NA	4	3

# multiply the METs by the CDC recommended factors
have_PA_qs_2$PAQ610_score <- have_PA_qs_2$PAQ610 *8
have_PA_qs_2$PAQ625_score <- have_PA_qs_2$PAQ625 *4
have_PA_qs_2$PAQ640_score <- have_PA_qs_2$PAQ640 *4
have_PA_qs_2$PAQ655_score <- have_PA_qs_2$PAQ655 *8
have_PA_qs_2$PAQ670_score <- have_PA_qs_2$PAQ670 *8


#PA duration in minute per day
have_PA_qs_2$minutes<-rowSums(cbind(have_PA_qs_2$PAQ610_score , have_PA_qs_2$PAQ625_score ,have_PA_qs_2$PAQ640_score , have_PA_qs_2$PAQ655_score , have_PA_qs_2$PAQ670_score), na.rm=TRUE)
#PA duration in days per week
have_PA_qs_2$days<-rowSums(cbind(have_PA_qs_2$PAD615 , have_PA_qs_2$PAD630 ,have_PA_qs_2$PAD645 , have_PA_qs_2$PAD660 , have_PA_qs_2$PAD675), na.rm=TRUE)

have_PA_qs_2<-have_PA_qs_2 %>%
  mutate(met_1 = PAQ610_score * PAD615,
         met_2 = PAQ625_score * PAD630,
         met_3 = PAQ640_score * PAD645,
         met_4 = PAQ655_score * PAD660,
         met_5 = PAQ670_score * PAD675)
#cbind- merge PA variables importanat for our analysis
have_PA_qs_2$combined_MET_Scores<- rowSums(cbind(have_PA_qs_2$met_1 , have_PA_qs_2$met_2 ,have_PA_qs_2$met_3 , have_PA_qs_2$met_4 , have_PA_qs_2$met_5), na.rm=TRUE)
have_PA_qs_2$combined_MET_Scores_hrs <- have_PA_qs_2$combined_MET_Scores /60

head(have_PA_qs_2)

##    SEQN PAD615 PAD630 PAD645 PAD660 PAD675 PAQ610 PAQ625 PAQ640 PAQ655 PAQ670
## 1 93705     NA     NA     NA     NA     60     NA     NA     NA     NA      2
## 2 93706     NA     NA     45     NA     30     NA     NA      5     NA      2
## 3 93708     NA     NA     NA     NA     30     NA     NA     NA     NA      5
## 4 93709     NA    180     NA     NA     NA     NA      2     NA     NA     NA
## 5 93711     NA     NA     60     60     30     NA     NA      5      4      2
## 6 93712    420    180     NA     45     90      6      6     NA      4      3
##   PAQ610_score PAQ625_score PAQ640_score PAQ655_score PAQ670_score minutes days
## 1           NA           NA           NA           NA           16      16   60
## 2           NA           NA           20           NA           16      36   75
## 3           NA           NA           NA           NA           40      40   30
## 4           NA            8           NA           NA           NA       8  180
## 5           NA           NA           20           32           16      68  150
## 6           48           24           NA           32           24     128  735
##   met_1 met_2 met_3 met_4 met_5 combined_MET_Scores combined_MET_Scores_hrs
## 1    NA    NA    NA    NA   960                 960                      16
## 2    NA    NA   900    NA   480                1380                      23
## 3    NA    NA    NA    NA  1200                1200                      20
## 4    NA  1440    NA    NA    NA                1440                      24
## 5    NA    NA  1200  1920   480                3600                      60
## 6 20160  4320    NA  1440  2160               28080                     468

PA_Level_MET_Score <- have_PA_qs_2 %>%
  dplyr::select("SEQN", "combined_MET_Scores_hrs")
kable(head(PA_Level_MET_Score))

SEQN	combined_MET_Scores_hrs
93705	16
93706	23
93708	20
93709	24
93711	60
93712	468

#Download data BMI
db_BMI_2018<-nhanes('BMX_J')
db_BMI_vars<-names(db_BMI_2018)
db_BMI_2018_qs<- db_BMI_2018[c("SEQN","BMXHT","BMXWT")]
db_BMI_2018_2<-nhanesTranslate('BMX_J', db_BMI_vars, data = db_BMI_2018_qs)

## Warning in FUN(X[[i]], ...): No translation table is available for SEQN

## Warning in nhanesTranslate("BMX_J", db_BMI_vars, data = db_BMI_2018_qs): No
## columns were translated

kable(head(db_BMI_2018_2))

SEQN	BMXHT	BMXWT
93703	88.6	13.7
93704	94.2	13.9
93705	158.3	79.5
93706	175.7	66.3
93707	158.4	45.4
93708	150.2	53.5

#merge all datasets

stat_project_analytic_data<- join_all(list(demo_2, hbaic_lab_2, PA_Level_MET_Score, db_BMI_2018_2), by = "SEQN", type = "full")

kable(head(stat_project_analytic_data))

SEQN	RIAGENDR	RIDAGEYR	DMDMARTL	DMDEDUC2	RIDRETH3	LBXGH	combined_MET_Scores_hrs	BMXHT	BMXWT
93703	Female	2	NA	NA	Non-Hispanic Asian	NA	NA	88.6	13.7
93704	Male	2	NA	NA	Non-Hispanic White	NA	NA	94.2	13.9
93705	Female	66	Divorced	9-11th grade (Includes 12th grad	Non-Hispanic Black	6.2	16	158.3	79.5
93706	Male	18	NA	NA	Non-Hispanic Asian	5.2	23	175.7	66.3
93707	Male	13	NA	NA	Other Race - Including Multi-Rac	5.6	NA	158.4	45.4
93708	Female	66	Married	Less than 9th grade	Non-Hispanic Asian	6.2	20	150.2	53.5

dim(stat_project_analytic_data)

## [1] 9254   10

#Data cleaning

#filter age 20-80
stat_project_analytic_data_cleaned <-stat_project_analytic_data %>%
  dplyr::filter(RIDAGEYR >= 20 & RIDAGEYR <= 80)




#mutate to rename variables and create BMI variable

stat_project_analytic_data_cleaned <-stat_project_analytic_data %>%
  dplyr::filter(RIDAGEYR >= 20 & RIDAGEYR<= 80)%>%
  mutate(DMDMARTL = recode_factor(DMDMARTL, "Married" = "Married",
                                  "Widowed" = "Other",
                                  "Refused" = "NA",
                                  "Divorced" = "Other",
                                  "Separated"= "Other",
                                  "Living with partner" = "Other",
                                  "Never married" = "Unmarried"),
         DMDEDUC2 = recode_factor(DMDEDUC2, "Don't Know" = "NA",
                                  "Refused" = "NA",
                                  "9-11th grade (Includes 12th grad" = "9-11th grade ",
                                  "High school graduate/GED or equi" = "College graduate or higher",
                                  "Some college or AA degree"= "College graduate or higher",
                                  "College graduate or above" = "College graduate or higher"),
         
         BMXHT = BMXHT/100,
         BMI = BMXWT/BMXHT^2,
         age.cat = cut(x = RIDAGEYR,
                       breaks = c(20, 44, 64, 79, Inf),
                       labels = c("20-44", "45-64", "65-79", ">=80")),
         BMI.cat = cut(x = BMI,
                       breaks = c(0, 18.5, 25, 30, +Inf),
                       labels = c("<18.5", "18.5-24.9", "25-29.9", ">=30"))
         
  )

#Create hbaic category a
stat_project_analytic_data_cleaned$LBXGH<- ifelse(stat_project_analytic_data_cleaned$LBXGH < 6.5, 0, 1)
stat_project_analytic_data_cleaned$LBXGH<- as.factor(stat_project_analytic_data_cleaned$LBXGH)

#create diabetes status category
stat_project_analytic_data_cleaned$db_status[stat_project_analytic_data_cleaned$LBXGH == "0"]<- "Non_diabetic"
stat_project_analytic_data_cleaned$db_status[stat_project_analytic_data_cleaned$LBXGH == "1"]<- "Diabetic"
stat_project_analytic_data_cleaned$db_status<-as.factor(stat_project_analytic_data_cleaned$db_status)

# EDA
table(stat_project_analytic_data_cleaned$RIAGENDR)

## 
##   Male Female 
##   2702   2867

table(stat_project_analytic_data_cleaned$age.cat)

## 
## 20-44 45-64 65-79  >=80 
##  2019  1974  1073   427

table(stat_project_analytic_data_cleaned$DMDMARTL)

## 
##   Married     Other        NA Unmarried 
##      2737      1820         6      1006

table(stat_project_analytic_data_cleaned$DMDEDUC2)

## 
##                         NA              9-11th grade  
##                         13                        638 
## College graduate or higher        Less than 9th grade 
##                       4439                        479

table(stat_project_analytic_data_cleaned$LBXGH)

## 
##    0    1 
## 4273  745

table(stat_project_analytic_data_cleaned$BMI.cat)

## 
##     <18.5 18.5-24.9   25-29.9      >=30 
##        83      1264      1666      2162

table(stat_project_analytic_data_cleaned$RIDRETH3)

## 
##                 Mexican American                   Other Hispanic 
##                              735                              517 
##               Non-Hispanic White               Non-Hispanic Black 
##                             1935                             1298 
##               Non-Hispanic Asian Other Race - Including Multi-Rac 
##                              811                              273

summary(stat_project_analytic_data_cleaned)

##       SEQN          RIAGENDR       RIDAGEYR         DMDMARTL   
##  Min.   : 93705   Male  :2702   Min.   :20.0   Married  :2737  
##  1st Qu.: 95969   Female:2867   1st Qu.:36.0   Other    :1820  
##  Median : 98274                 Median :53.0   NA       :   6  
##  Mean   : 98288                 Mean   :51.5   Unmarried:1006  
##  3rd Qu.:100621                 3rd Qu.:66.0                   
##  Max.   :102956                 Max.   :80.0                   
##                                                                
##                        DMDEDUC2                                RIDRETH3   
##  NA                        :  13   Mexican American                : 735  
##  9-11th grade              : 638   Other Hispanic                  : 517  
##  College graduate or higher:4439   Non-Hispanic White              :1935  
##  Less than 9th grade       : 479   Non-Hispanic Black              :1298  
##                                    Non-Hispanic Asian              : 811  
##                                    Other Race - Including Multi-Rac: 273  
##                                                                           
##   LBXGH      combined_MET_Scores_hrs     BMXHT           BMXWT       
##  0   :4273   Min.   :   0.00         Min.   :1.383   Min.   : 32.60  
##  1   : 745   1st Qu.:   0.00         1st Qu.:1.588   1st Qu.: 67.00  
##  NA's: 551   Median :  24.00         Median :1.658   Median : 79.10  
##              Mean   :  92.90         Mean   :1.663   Mean   : 82.84  
##              3rd Qu.:  89.33         3rd Qu.:1.736   3rd Qu.: 94.80  
##              Max.   :9352.40         Max.   :1.977   Max.   :242.60  
##                                      NA's   :384     NA's   :384     
##       BMI         age.cat          BMI.cat            db_status   
##  Min.   :14.20   20-44:2019   <18.5    :  83   Diabetic    : 745  
##  1st Qu.:24.79   45-64:1974   18.5-24.9:1264   Non_diabetic:4273  
##  Median :28.64   65-79:1073   25-29.9  :1666   NA's        : 551  
##  Mean   :29.85   >=80 : 427   >=30     :2162                      
##  3rd Qu.:33.56   NA's :  76   NA's     : 394                      
##  Max.   :86.16                                                    
##  NA's   :394

#EDA
par(mfrow = c(2, 3))
hist(stat_project_analytic_data_cleaned$RIDAGEYR)#AGE
hist(stat_project_analytic_data_cleaned$BMI)#BMI
plot(stat_project_analytic_data_cleaned$RIDRETH3)#RACE
plot(stat_project_analytic_data_cleaned$RIAGENDR)#Gender
plot(stat_project_analytic_data_cleaned$LBXGH)#DM status

#select diabetes status, PA levels, and covariates that have been shown to be associated with increased prevalence of diabetes (age, gender, race, bmi).We had pre-determined covariates, so we did not perform correlation analysis.
NHANES_DB_PA_DATA_ANALYSIS<- stat_project_analytic_data_cleaned%>%
  dplyr::select(SEQN, RIAGENDR, age.cat, DMDMARTL, DMDEDUC2, RIDRETH3, combined_MET_Scores_hrs, BMI.cat, db_status)

kable(head(NHANES_DB_PA_DATA_ANALYSIS))

SEQN	RIAGENDR	age.cat	DMDMARTL	DMDEDUC2	RIDRETH3	combined_MET_Scores_hrs	BMI.cat	db_status
93705	Female	65-79	Other	9-11th grade	Non-Hispanic Black	16	>=30	Non_diabetic
93708	Female	65-79	Married	Less than 9th grade	Non-Hispanic Asian	20	18.5-24.9	Non_diabetic
93709	Female	65-79	Other	College graduate or higher	Non-Hispanic Black	24	>=30	Non_diabetic
93711	Male	45-64	Married	College graduate or higher	Non-Hispanic Asian	60	18.5-24.9	Non_diabetic
93713	Male	65-79	Other	College graduate or higher	Non-Hispanic White	30	18.5-24.9	Non_diabetic
93714	Female	45-64	Married	College graduate or higher	Non-Hispanic Black	0	>=30	Diabetic

#RENAME VARIABLES
NHANES_DB_PA_DATA_ANALYSIS$Age<- NHANES_DB_PA_DATA_ANALYSIS$age.cat
NHANES_DB_PA_DATA_ANALYSIS$marital_status<- NHANES_DB_PA_DATA_ANALYSIS$DMDMARTL
NHANES_DB_PA_DATA_ANALYSIS$education<- NHANES_DB_PA_DATA_ANALYSIS$DMDEDUC2
NHANES_DB_PA_DATA_ANALYSIS$Gender<-NHANES_DB_PA_DATA_ANALYSIS$RIAGENDR
NHANES_DB_PA_DATA_ANALYSIS$race <- NHANES_DB_PA_DATA_ANALYSIS$RIDRETH3


NHANES_DB_PA_DATA_ANALYSIS_2<- NHANES_DB_PA_DATA_ANALYSIS%>%
  dplyr::select(SEQN,combined_MET_Scores_hrs, BMI.cat, db_status, Age, Gender, marital_status, education, race)
kable(head(NHANES_DB_PA_DATA_ANALYSIS_2))

SEQN	combined_MET_Scores_hrs	BMI.cat	db_status	Age	Gender	marital_status	education	race
93705	16	>=30	Non_diabetic	65-79	Female	Other	9-11th grade	Non-Hispanic Black
93708	20	18.5-24.9	Non_diabetic	65-79	Female	Married	Less than 9th grade	Non-Hispanic Asian
93709	24	>=30	Non_diabetic	65-79	Female	Other	College graduate or higher	Non-Hispanic Black
93711	60	18.5-24.9	Non_diabetic	45-64	Male	Married	College graduate or higher	Non-Hispanic Asian
93713	30	18.5-24.9	Non_diabetic	65-79	Male	Other	College graduate or higher	Non-Hispanic White
93714	0	>=30	Diabetic	45-64	Female	Married	College graduate or higher	Non-Hispanic Black

#Remove NA's
complete_NHANES_DB_PA_DATA_ANALYSIS_3<-na.omit(NHANES_DB_PA_DATA_ANALYSIS_2)
#subset
#remove rows where column matital status & education is equal to "NA"
complete_NHANES_DB_PA_DATA_ANALYSIS_3 <- subset(complete_NHANES_DB_PA_DATA_ANALYSIS_3, complete_NHANES_DB_PA_DATA_ANALYSIS_3$education != "NA")
complete_NHANES_DB_PA_DATA_ANALYSIS_3 <- subset(complete_NHANES_DB_PA_DATA_ANALYSIS_3, complete_NHANES_DB_PA_DATA_ANALYSIS_3$marital_status != "NA")

summary(complete_NHANES_DB_PA_DATA_ANALYSIS_3)

##       SEQN        combined_MET_Scores_hrs      BMI.cat            db_status   
##  Min.   : 93705   Min.   :   0.00         <18.5    :  69   Diabetic    : 722  
##  1st Qu.: 95945   1st Qu.:   0.00         18.5-24.9:1166   Non_diabetic:4139  
##  Median : 98257   Median :  26.00         25-29.9  :1571                      
##  Mean   : 98275   Mean   :  93.14         >=30     :2055                      
##  3rd Qu.:100590   3rd Qu.:  90.00                                             
##  Max.   :102956   Max.   :9352.40                                             
##     Age          Gender       marital_status                      education   
##  20-44:1771   Male  :2332   Married  :2483   NA                        :   0  
##  45-64:1808   Female:2529   Other    :1560   9-11th grade              : 547  
##  65-79: 946                 NA       :   0   College graduate or higher:3900  
##  >=80 : 336                 Unmarried: 818   Less than 9th grade       : 414  
##                                                                               
##                                                                               
##                                race     
##  Mexican American                : 651  
##  Other Hispanic                  : 460  
##  Non-Hispanic White              :1700  
##  Non-Hispanic Black              :1113  
##  Non-Hispanic Asian              : 693  
##  Other Race - Including Multi-Rac: 244

dim(complete_NHANES_DB_PA_DATA_ANALYSIS_3)

## [1] 4861    9

kable(head(complete_NHANES_DB_PA_DATA_ANALYSIS_3))

SEQN	combined_MET_Scores_hrs	BMI.cat	db_status	Age	Gender	marital_status	education	race
93705	16	>=30	Non_diabetic	65-79	Female	Other	9-11th grade	Non-Hispanic Black
93708	20	18.5-24.9	Non_diabetic	65-79	Female	Married	Less than 9th grade	Non-Hispanic Asian
93709	24	>=30	Non_diabetic	65-79	Female	Other	College graduate or higher	Non-Hispanic Black
93711	60	18.5-24.9	Non_diabetic	45-64	Male	Married	College graduate or higher	Non-Hispanic Asian
93713	30	18.5-24.9	Non_diabetic	65-79	Male	Other	College graduate or higher	Non-Hispanic White
93714	0	>=30	Diabetic	45-64	Female	Married	College graduate or higher	Non-Hispanic Black

#write data as a table
write.table(complete_NHANES_DB_PA_DATA_ANALYSIS_3, file = 'complete_NHANES_DB_PA_DATA_ANALYSIS_33.txt', quote = FALSE,sep='\t', col.names = NA, row.names = TRUE)
#Load cleaned data
NHANES_COMPLETE<- read_excel("complete_NHANES_DB_PA_DATA_ANALYSIS_33.xlsx")
typeof(NHANES_COMPLETE)

## [1] "list"

NHANES_COMPLETE<- as.data.frame(NHANES_COMPLETE)
#EDA
summary(NHANES_COMPLETE)

##       SEQN        combined_MET_Scores_hrs   BMI.cat           db_status        
##  Min.   : 93705   Min.   :   0.00         Length:4861        Length:4861       
##  1st Qu.: 95945   1st Qu.:   0.00         Class :character   Class :character  
##  Median : 98257   Median :  26.00         Mode  :character   Mode  :character  
##  Mean   : 98275   Mean   :  93.14                                              
##  3rd Qu.:100590   3rd Qu.:  90.00                                              
##  Max.   :102956   Max.   :9352.40                                              
##      Age               Gender          marital_status      education        
##  Length:4861        Length:4861        Length:4861        Length:4861       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      race          
##  Length:4861       
##  Class :character  
##  Mode  :character  
##                    
##                    
##

str(NHANES_COMPLETE)

## 'data.frame':    4861 obs. of  9 variables:
##  $ SEQN                   : num  93705 93708 93709 93711 93713 ...
##  $ combined_MET_Scores_hrs: num  16 20 24 60 30 ...
##  $ BMI.cat                : chr  ">=30" "18.5-24.9" ">=30" "18.5-24.9" ...
##  $ db_status              : chr  "Non_diabetic" "Non_diabetic" "Non_diabetic" "Non_diabetic" ...
##  $ Age                    : chr  "65-79" "65-79" "65-79" "45-64" ...
##  $ Gender                 : chr  "Female" "Female" "Female" "Male" ...
##  $ marital_status         : chr  "Other" "Married" "Other" "Married" ...
##  $ education              : chr  "9-11th grade" "Less than 9th grade" "College graduate or higher" "College graduate or higher" ...
##  $ race                   : chr  "Non-Hispanic Black" "Non-Hispanic Asian" "Non-Hispanic Black" "Non-Hispanic Asian" ...

#convert to  variables to factor
NHANES_COMPLETE$BMI.cat <- as.factor(NHANES_COMPLETE$BMI.cat)
NHANES_COMPLETE$db_status <- as.factor(NHANES_COMPLETE$db_status)
NHANES_COMPLETE$Age <- as.factor(NHANES_COMPLETE$Age)
NHANES_COMPLETE$Gender <- as.factor(NHANES_COMPLETE$Gender)
NHANES_COMPLETE$marital_status <- as.factor(NHANES_COMPLETE$marital_status)
NHANES_COMPLETE$race <- as.factor(NHANES_COMPLETE$race)
NHANES_COMPLETE$education <- as.factor(NHANES_COMPLETE$education)

str(NHANES_COMPLETE)

## 'data.frame':    4861 obs. of  9 variables:
##  $ SEQN                   : num  93705 93708 93709 93711 93713 ...
##  $ combined_MET_Scores_hrs: num  16 20 24 60 30 ...
##  $ BMI.cat                : Factor w/ 4 levels "<18.5",">=30",..: 2 3 2 3 3 2 3 2 3 3 ...
##  $ db_status              : Factor w/ 2 levels "Diabetic","Non_diabetic": 2 2 2 2 2 1 2 2 2 2 ...
##  $ Age                    : Factor w/ 4 levels ">=80","20-44",..: 4 4 4 3 4 3 4 3 2 3 ...
##  $ Gender                 : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 2 ...
##  $ marital_status         : Factor w/ 3 levels "Married","Other",..: 2 1 2 1 2 1 1 1 3 3 ...
##  $ education              : Factor w/ 3 levels "9-11th grade",..: 1 3 2 2 2 2 2 2 2 2 ...
##  $ race                   : Factor w/ 6 levels "Mexican American",..: 3 2 3 2 4 3 6 2 4 3 ...

#rename variables again
NHANES_COMPLETE <-NHANES_COMPLETE%>%
  dplyr::select(combined_MET_Scores_hrs, BMI.cat, db_status, Age, Gender, marital_status, education, race)%>%
  mutate(race = recode_factor(race, "Mexican American" = "Hispanic",
                              "Other Hispanic" = "Hispanic",
                              "Other Race - Including Multi-Rac" = "Other/Multiracial",
                              "Non-Hispanic Asian" = "Asian",
                              "Non-Hispanic Black" = "Black",
                              "Non-Hispanic White" = "White"))

reorder_size <- function(x) {
  factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}

#create Mets score factor
NHANES_COMPLETE<-NHANES_COMPLETE%>%
  mutate(MET.cat = cut(x = combined_MET_Scores_hrs,
                       breaks = c(-Inf,0, 1000,  +Inf),
                       labels = c("No_physical_activity", "mod_PA_level", "High_PA_level"))
  )

summary(NHANES_COMPLETE)

##  combined_MET_Scores_hrs      BMI.cat            db_status       Age      
##  Min.   :   0.00         <18.5    :  69   Diabetic    : 722   >=80 : 336  
##  1st Qu.:   0.00         >=30     :2055   Non_diabetic:4139   20-44:1771  
##  Median :  26.00         18.5-24.9:1166                       45-64:1808  
##  Mean   :  93.14         25-29.9  :1571                       65-79: 946  
##  3rd Qu.:  90.00                                                          
##  Max.   :9352.40                                                          
##     Gender       marital_status                      education   
##  Female:2529   Married  :2483   9-11th grade              : 547  
##  Male  :2332   Other    :1560   College graduate or higher:3900  
##                Unmarried: 818   Less than 9th grade       : 414  
##                                                                  
##                                                                  
##                                                                  
##                 race                      MET.cat    
##  Hispanic         :1111   No_physical_activity:1259  
##  Other/Multiracial: 244   mod_PA_level        :3583  
##  Asian            : 693   High_PA_level       :  19  
##  Black            :1113                              
##  White            :1700                              
##

# More EDA
ggplot(NHANES_COMPLETE, aes(x = reorder_size(BMI.cat)))+
  geom_bar(color = "blue", fill="blue")+
  facet_grid(~ db_status)+
  theme_minimal()+
  xlab("BMI Category")+
  scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(Age)))+
  geom_bar(color = "blue", fill="blue")+
  facet_grid(~ db_status)+
  theme_minimal()+
  xlab("Age Category")+
  scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(db_status)))+
geom_bar(color = "blue", fill="blue")+
theme_minimal()+
xlab("DM Status")+
scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(MET.cat)))+
  geom_bar(color = "blue", fill="blue")+
  facet_grid(~ db_status)+
  theme_minimal()+
  xlab("METscore Category")+
  scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(marital_status))+
  geom_bar(color = "blue", fill="blue")+
  facet_grid(~ db_status)+
  theme_minimal()+
  xlab("Marital status")+
  scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(race)))+
  geom_bar(color = "blue", fill="blue")+
  facet_grid(~ db_status)+
  theme_minimal()+
  xlab("Race")+
  scale_x_discrete( guide = guide_axis(angle = 70))

ggplot(NHANES_COMPLETE, aes(x = reorder_size(Gender)))+
  geom_bar(color = "blue", fill="blue")+
  facet_grid(~ db_status)+
  theme_minimal()+
  xlab("Gender")+
  scale_x_discrete( guide = guide_axis(angle = 70))

#normality assumption
##shapiro-wilks and qqplots

#shapiro
shapiro.test(NHANES_COMPLETE$combined_MET_Scores_hrs)

## 
##  Shapiro-Wilk normality test
## 
## data:  NHANES_COMPLETE$combined_MET_Scores_hrs
## W = 0.19001, p-value < 2.2e-16

#qqplots
ggqqplot(NHANES_COMPLETE$combined_MET_Scores_hrs)

#other variables are categorical so normality testing was not performed. the METs scores were not normally distributed as seen by the shapiro-wilks test (p , 0.05) and qq-plots results

#Ho= there is no significant association between PA level and diabetes status
#Ha= there is a significant association between PA level and diabetes status

# chi-square for categorical variables
chisq.test(NHANES_COMPLETE$BMI.cat, NHANES_COMPLETE$db_status)

## 
##  Pearson's Chi-squared test
## 
## data:  NHANES_COMPLETE$BMI.cat and NHANES_COMPLETE$db_status
## X-squared = 99.689, df = 3, p-value < 2.2e-16

chisq.test(NHANES_COMPLETE$Age, NHANES_COMPLETE$db_status)

## 
##  Pearson's Chi-squared test
## 
## data:  NHANES_COMPLETE$Age and NHANES_COMPLETE$db_status
## X-squared = 317.1, df = 3, p-value < 2.2e-16

chisq.test(NHANES_COMPLETE$Gender, NHANES_COMPLETE$db_status)

## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  NHANES_COMPLETE$Gender and NHANES_COMPLETE$db_status
## X-squared = 4.797, df = 1, p-value = 0.02851

chisq.test(NHANES_COMPLETE$marital_status, NHANES_COMPLETE$db_status)

## 
##  Pearson's Chi-squared test
## 
## data:  NHANES_COMPLETE$marital_status and NHANES_COMPLETE$db_status
## X-squared = 33.638, df = 2, p-value = 4.96e-08

chisq.test(NHANES_COMPLETE$education, NHANES_COMPLETE$db_status)

## 
##  Pearson's Chi-squared test
## 
## data:  NHANES_COMPLETE$education and NHANES_COMPLETE$db_status
## X-squared = 42.801, df = 2, p-value = 5.079e-10

chisq.test(NHANES_COMPLETE$race, NHANES_COMPLETE$db_status)

## 
##  Pearson's Chi-squared test
## 
## data:  NHANES_COMPLETE$race and NHANES_COMPLETE$db_status
## X-squared = 17.518, df = 4, p-value = 0.001533

chisq.test(NHANES_COMPLETE$MET.cat, NHANES_COMPLETE$db_status)

## Warning in chisq.test(NHANES_COMPLETE$MET.cat, NHANES_COMPLETE$db_status): Chi-
## squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  NHANES_COMPLETE$MET.cat and NHANES_COMPLETE$db_status
## X-squared = 41.886, df = 2, p-value = 8.028e-10

#T-test
t.test(  NHANES_COMPLETE$combined_MET_Scores_hrs ~  NHANES_COMPLETE$db_status)

## 
##  Welch Two Sample t-test
## 
## data:  NHANES_COMPLETE$combined_MET_Scores_hrs by NHANES_COMPLETE$db_status
## t = -2.384, df = 1108.3, p-value = 0.01729
## alternative hypothesis: true difference in means between group Diabetic and group Non_diabetic is not equal to 0
## 95 percent confidence interval:
##  -53.221530  -5.166547
## sample estimates:
##     mean in group Diabetic mean in group Non_diabetic 
##                   68.27802                   97.47206

#create table one. Categorical Chisquare, continous t-test
CreateTableOne(data= NHANES_COMPLETE, strata = "db_status", vars = c("Age", "Gender", "BMI.cat","marital_status", "education", "race", "MET.cat", "combined_MET_Scores_hrs" ))

##                                      Stratified by db_status
##                                       Diabetic       Non_diabetic   p      test
##   n                                     722           4139                     
##   Age (%)                                                           <0.001     
##      >=80                                69 ( 9.6)     267 ( 6.5)              
##      20-44                               60 ( 8.3)    1711 (41.3)              
##      45-64                              343 (47.5)    1465 (35.4)              
##      65-79                              250 (34.6)     696 (16.8)              
##   Gender = Male (%)                     374 (51.8)    1958 (47.3)    0.029     
##   BMI.cat (%)                                                       <0.001     
##      <18.5                                1 ( 0.1)      68 ( 1.6)              
##      >=30                               411 (56.9)    1644 (39.7)              
##      18.5-24.9                           91 (12.6)    1075 (26.0)              
##      25-29.9                            219 (30.3)    1352 (32.7)              
##   marital_status (%)                                                <0.001     
##      Married                            415 (57.5)    2068 (50.0)              
##      Other                              238 (33.0)    1322 (31.9)              
##      Unmarried                           69 ( 9.6)     749 (18.1)              
##   education (%)                                                     <0.001     
##      9-11th grade                        90 (12.5)     457 (11.0)              
##      College graduate or higher         527 (73.0)    3373 (81.5)              
##      Less than 9th grade                105 (14.5)     309 ( 7.5)              
##   race (%)                                                           0.002     
##      Hispanic                           185 (25.6)     926 (22.4)              
##      Other/Multiracial                   38 ( 5.3)     206 ( 5.0)              
##      Asian                              108 (15.0)     585 (14.1)              
##      Black                              187 (25.9)     926 (22.4)              
##      White                              204 (28.3)    1496 (36.1)              
##   MET.cat (%)                                                       <0.001     
##      No_physical_activity               255 (35.3)    1004 (24.3)              
##      mod_PA_level                       462 (64.0)    3121 (75.4)              
##      High_PA_level                        5 ( 0.7)      14 ( 0.3)              
##   combined_MET_Scores_hrs (mean (SD)) 68.28 (294.74) 97.47 (350.26)  0.035

#Model1- Model I only comparing diabetes to physical activity
logit1<- glm(formula= db_status~MET.cat, data= NHANES_COMPLETE, family='binomial')
summary(logit1)

## 
## Call:
## glm(formula = db_status ~ MET.cat, family = "binomial", data = NHANES_COMPLETE)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0240   0.5254   0.5254   0.5254   0.7815  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           1.37048    0.07013  19.543  < 2e-16 ***
## MET.catmod_PA_level   0.53986    0.08604   6.275  3.5e-10 ***
## MET.catHigh_PA_level -0.34086    0.52569  -0.648    0.517    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4084.7  on 4860  degrees of freedom
## Residual deviance: 4045.1  on 4858  degrees of freedom
## AIC: 4051.1
## 
## Number of Fisher Scoring iterations: 4

odds.n.ends(logit1)

## Waiting for profiling to be done...

## $`Logistic regression model significance`
## Chi-squared        d.f.           p 
##      39.552           2       <.001 
## 
## $`Contingency tables (model fit): frequency predicted`
##                 Number observed
## Number predicted    1    0  Sum
##              1   4139  722 4861
##              0      0    0    0
##              Sum 4139  722 4861
## 
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 85.14709
## 
## $`Model sensitivity`
## [1] 1
## 
## $`Model specificity`
## [1] 0
## 
## $`Predictor odds ratios and 95% CI`
##                             OR     2.5 %   97.5 %
## (Intercept)          3.9372549 3.4378852 4.526118
## MET.catmod_PA_level  1.7157668 1.4481933 2.029324
## MET.catHigh_PA_level 0.7111554 0.2693302 2.217103

rocplot(logit1)

#Model2 comparing diabetes to physical activity, adjusted for baseline gender, age, and race
logit2<- glm(formula= db_status~MET.cat+Gender+Age+race, data = NHANES_COMPLETE, family='binomial'(link=logit))
summary(logit2)

## 
## Call:
## glm(formula = db_status ~ MET.cat + Gender + Age + race, family = binomial(link = logit), 
##     data = NHANES_COMPLETE)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8015   0.2330   0.3644   0.6743   1.0291  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            0.87961    0.17378   5.062 4.16e-07 ***
## MET.catmod_PA_level    0.31111    0.09122   3.410 0.000649 ***
## MET.catHigh_PA_level  -0.38428    0.54959  -0.699 0.484416    
## GenderMale            -0.17532    0.08475  -2.069 0.038578 *  
## Age20-44               2.11473    0.19629  10.773  < 2e-16 ***
## Age45-64               0.25065    0.15583   1.608 0.107731    
## Age65-79              -0.23532    0.15840  -1.486 0.137392    
## raceOther/Multiracial  0.03573    0.20414   0.175 0.861065    
## raceAsian              0.06845    0.13817   0.495 0.620325    
## raceBlack              0.09933    0.11913   0.834 0.404395    
## raceWhite              0.59870    0.11773   5.085 3.67e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4084.7  on 4860  degrees of freedom
## Residual deviance: 3661.5  on 4850  degrees of freedom
## AIC: 3683.5
## 
## Number of Fisher Scoring iterations: 6

odds.n.ends(logit2)

## Waiting for profiling to be done...

## $`Logistic regression model significance`
## Chi-squared        d.f.           p 
##     423.161          10       <.001 
## 
## $`Contingency tables (model fit): frequency predicted`
##                 Number observed
## Number predicted    1    0  Sum
##              1   4139  722 4861
##              0      0    0    0
##              Sum 4139  722 4861
## 
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 85.14709
## 
## $`Model sensitivity`
## [1] 1
## 
## $`Model specificity`
## [1] 0
## 
## $`Predictor odds ratios and 95% CI`
##                              OR     2.5 %     97.5 %
## (Intercept)           2.4099712 1.7216053  3.4045349
## MET.catmod_PA_level   1.3649350 1.1404844  1.6309366
## MET.catHigh_PA_level  0.6809392 0.2431054  2.1986726
## GenderMale            0.8391871 0.7106172  0.9907398
## Age20-44              8.2873875 5.6457383 12.2025714
## Age45-64              1.2848635 0.9418372  1.7362182
## Age65-79              0.7903165 0.5765364  1.0735610
## raceOther/Multiracial 1.0363745 0.7006954  1.5626749
## raceAsian             1.0708435 0.8180847  1.4066951
## raceBlack             1.1044275 0.8743948  1.3951350
## raceWhite             1.8197491 1.4447067  2.2925308

rocplot(logit2)

# Model3 comparing diabetes to physical activity,adjusted for baseline gender, age, race, education level, marital status, BMI
logit3<- glm(formula = db_status~ MET.cat+Gender+Age+race+marital_status+education+BMI.cat,  data= NHANES_COMPLETE, family='binomial')
summary(logit3)

## 
## Call:
## glm(formula = db_status ~ MET.cat + Gender + Age + race + marital_status + 
##     education + BMI.cat, family = "binomial", data = NHANES_COMPLETE)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9305   0.1971   0.3629   0.6521   1.2866  
## 
## Coefficients:
##                                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          3.06721    1.03457   2.965  0.00303 ** 
## MET.catmod_PA_level                  0.27600    0.09322   2.961  0.00307 ** 
## MET.catHigh_PA_level                -0.48182    0.56247  -0.857  0.39166    
## GenderMale                          -0.19639    0.08777  -2.237  0.02526 *  
## Age20-44                             2.27662    0.20600  11.052  < 2e-16 ***
## Age45-64                             0.44361    0.16216   2.736  0.00623 ** 
## Age65-79                            -0.07388    0.16286  -0.454  0.65010    
## raceOther/Multiracial               -0.15810    0.21398  -0.739  0.46001    
## raceAsian                           -0.42496    0.15310  -2.776  0.00551 ** 
## raceBlack                           -0.07606    0.13380  -0.568  0.56973    
## raceWhite                            0.42752    0.13148   3.252  0.00115 ** 
## marital_statusOther                  0.15193    0.09788   1.552  0.12060    
## marital_statusUnmarried              0.07850    0.15357   0.511  0.60923    
## educationCollege graduate or higher  0.19317    0.13465   1.435  0.15141    
## educationLess than 9th grade        -0.27912    0.17865  -1.562  0.11819    
## BMI.cat>=30                         -2.77099    1.01469  -2.731  0.00632 ** 
## BMI.cat18.5-24.9                    -1.53522    1.01890  -1.507  0.13188    
## BMI.cat25-29.9                      -2.11005    1.01570  -2.077  0.03776 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4084.7  on 4860  degrees of freedom
## Residual deviance: 3528.7  on 4843  degrees of freedom
## AIC: 3564.7
## 
## Number of Fisher Scoring iterations: 6

odds.n.ends(logit3)

## Waiting for profiling to be done...

## $`Logistic regression model significance`
## Chi-squared        d.f.           p 
##     556.034          17       <.001 
## 
## $`Contingency tables (model fit): frequency predicted`
##                 Number observed
## Number predicted    1    0  Sum
##              1   4130  711 4841
##              0      9   11   20
##              Sum 4139  722 4861
## 
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 85.18823
## 
## $`Model sensitivity`
## [1] 0.9978256
## 
## $`Model specificity`
## [1] 0.01523546
## 
## $`Predictor odds ratios and 95% CI`
##                                              OR       2.5 %      97.5 %
## (Intercept)                         21.48191720 4.346606896 389.8093991
## MET.catmod_PA_level                  1.31784229 1.096823712   1.5808109
## MET.catHigh_PA_level                 0.61766069 0.215932043   2.0429546
## GenderMale                           0.82169161 0.691652866   0.9758021
## Age20-44                             9.74367616 6.515689148  14.6269937
## Age45-64                             1.55832799 1.128963096   2.1334311
## Age65-79                             0.92878461 0.671942732   1.2732542
## raceOther/Multiracial                0.85376451 0.565642808   1.3109837
## raceAsian                            0.65379342 0.484516244   0.8833529
## raceBlack                            0.92676089 0.712460769   1.2041144
## raceWhite                            1.53344490 1.184367187   1.9835253
## marital_statusOther                  1.16408228 0.961587529   1.4114958
## marital_statusUnmarried              1.08166097 0.804233503   1.4694569
## educationCollege graduate or higher  1.21308315 0.927696794   1.5734655
## educationLess than 9th grade         0.75645033 0.532384178   1.0730132
## BMI.cat>=30                          0.06260008 0.003516389   0.2906925
## BMI.cat18.5-24.9                     0.21540881 0.012051581   1.0143943
## BMI.cat25-29.9                       0.12123233 0.006803364   0.5648862

rocplot(logit3)

#females subgroup
female_NHANES_COMPLETE<- NHANES_COMPLETE%>%
  filter(Gender == "Female")

female_NHANES_COMPLETE <-female_NHANES_COMPLETE%>%
  dplyr::select( combined_MET_Scores_hrs, BMI.cat, db_status, Age, Gender, marital_status, education, race)%>%
  mutate(race = recode_factor(race, "Mexican American" = "Hispanic",
                              "Other Hispanic" = "Hispanic",
                              "Other Race - Including Multi-Rac" = "Other/Multiracial",
                              "Non-Hispanic Asian" = "Asian",
                              "Non-Hispanic Black" = "Black",
                              "Non-Hispanic White" = "White"))

reorder_size <- function(x) {
  factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}

#create Mets score factor
female_NHANES_COMPLETE<-female_NHANES_COMPLETE%>%
  mutate(MET.cat = cut(x = combined_MET_Scores_hrs,
                       breaks = c(-Inf,0, 1000,  +Inf),
                       labels = c("No_physical_activity", "mod_PA_level", "High_PA_level"))
  )


summary(female_NHANES_COMPLETE)

##  combined_MET_Scores_hrs      BMI.cat            db_status       Age     
##  Min.   :   0.00         <18.5    :  40   Diabetic    : 348   >=80 :172  
##  1st Qu.:   0.00         >=30     :1127   Non_diabetic:2181   20-44:961  
##  Median :  18.67         18.5-24.9: 635                       45-64:936  
##  Mean   :  72.40         25-29.9  : 727                       65-79:460  
##  3rd Qu.:  60.00                                                         
##  Max.   :8047.20                                                         
##     Gender       marital_status                      education   
##  Female:2529   Married  :1185   9-11th grade              : 258  
##  Male  :   0   Other    : 940   College graduate or higher:2062  
##                Unmarried: 404   Less than 9th grade       : 209  
##                                                                  
##                                                                  
##                                                                  
##                 race                     MET.cat    
##  Hispanic         :588   No_physical_activity: 764  
##  Other/Multiracial:111   mod_PA_level        :1755  
##  Asian            :374   High_PA_level       :  10  
##  Black            :593                              
##  White            :863                              
##

#Female log regression
#model1
Flogit1<- glm(formula= db_status~MET.cat, data= female_NHANES_COMPLETE, family='binomial')
summary(Flogit1)

## 
## Call:
## glm(formula = db_status ~ MET.cat, family = "binomial", data = female_NHANES_COMPLETE)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0699   0.4997   0.4997   0.4997   0.6681  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           1.49451    0.09352  15.981  < 2e-16 ***
## MET.catmod_PA_level   0.52298    0.11935   4.382 1.18e-05 ***
## MET.catHigh_PA_level -0.10821    0.79608  -0.136    0.892    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2026.2  on 2528  degrees of freedom
## Residual deviance: 2007.2  on 2526  degrees of freedom
## AIC: 2013.2
## 
## Number of Fisher Scoring iterations: 4

odds.n.ends(Flogit1)

## Waiting for profiling to be done...

## $`Logistic regression model significance`
## Chi-squared        d.f.           p 
##      18.957           2       <.001 
## 
## $`Contingency tables (model fit): frequency predicted`
##                 Number observed
## Number predicted    1    0  Sum
##              1   2181  348 2529
##              0      0    0    0
##              Sum 2181  348 2529
## 
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 86.23962
## 
## $`Model sensitivity`
## [1] 1
## 
## $`Model specificity`
## [1] 0
## 
## $`Predictor odds ratios and 95% CI`
##                             OR     2.5 %   97.5 %
## (Intercept)          4.4571429 3.7233636 5.373603
## MET.catmod_PA_level  1.6870488 1.3334127 2.129606
## MET.catHigh_PA_level 0.8974359 0.2219567 5.988737

rocplot(Flogit1)

#Model2 comparing diabetes to physical activity, adjusted for baseline  age, and race
GFlogit2<- glm(formula= db_status~MET.cat+Age+race, data = female_NHANES_COMPLETE, family='binomial'(link=logit))
summary(GFlogit2)

## 
## Call:
## glm(formula = db_status ~ MET.cat + Age + race, family = binomial(link = logit), 
##     data = female_NHANES_COMPLETE)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7658   0.2718   0.3587   0.6576   0.9188  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            0.99717    0.24781   4.024 5.73e-05 ***
## MET.catmod_PA_level    0.30146    0.12544   2.403   0.0163 *  
## MET.catHigh_PA_level   0.07608    0.82952   0.092   0.9269    
## Age20-44               1.82284    0.27495   6.630 3.36e-11 ***
## Age45-64               0.12272    0.23123   0.531   0.5956    
## Age65-79              -0.24449    0.23804  -1.027   0.3044    
## raceOther/Multiracial  0.15667    0.31906   0.491   0.6234    
## raceAsian              0.15873    0.19432   0.817   0.4140    
## raceBlack             -0.10862    0.16306  -0.666   0.5053    
## raceWhite              0.68128    0.17137   3.975 7.03e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2026.2  on 2528  degrees of freedom
## Residual deviance: 1835.1  on 2519  degrees of freedom
## AIC: 1855.1
## 
## Number of Fisher Scoring iterations: 6

odds.n.ends(GFlogit2)

## Waiting for profiling to be done...

## $`Logistic regression model significance`
## Chi-squared        d.f.           p 
##     191.042           9       <.001 
## 
## $`Contingency tables (model fit): frequency predicted`
##                 Number observed
## Number predicted    1    0  Sum
##              1   2181  348 2529
##              0      0    0    0
##              Sum 2181  348 2529
## 
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 86.23962
## 
## $`Model sensitivity`
## [1] 1
## 
## $`Model specificity`
## [1] 0
## 
## $`Predictor odds ratios and 95% CI`
##                              OR     2.5 %    97.5 %
## (Intercept)           2.7106098 1.6852016  4.461988
## MET.catmod_PA_level   1.3518244 1.0557347  1.726774
## MET.catHigh_PA_level  1.0790502 0.2434876  7.502776
## Age20-44              6.1894148 3.5959868 10.602905
## Age45-64              1.1305687 0.7091528  1.760225
## Age65-79              0.7831011 0.4851937  1.236861
## raceOther/Multiracial 1.1696124 0.6425087  2.263197
## raceAsian             1.1720241 0.8036156  1.723582
## raceBlack             0.8970737 0.6510545  1.234517
## raceWhite             1.9764031 1.4134566  2.769391

rocplot(GFlogit2)

# Model3 comparing diabetes to physical activity,adjusted for baseline  age, race, education level, marital status, BMI
Flogit3<- glm(formula = db_status~ MET.cat+Age+race+marital_status+education+BMI.cat,  data= female_NHANES_COMPLETE, family='binomial')
summary(Flogit3)

## 
## Call:
## glm(formula = db_status ~ MET.cat + Age + race + marital_status + 
##     education + BMI.cat, family = "binomial", data = female_NHANES_COMPLETE)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.9346   0.2069   0.3708   0.6049   1.2305  
## 
## Coefficients:
##                                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          2.31645    1.06986   2.165  0.03037 *  
## MET.catmod_PA_level                  0.28460    0.12833   2.218  0.02658 *  
## MET.catHigh_PA_level                 0.19391    0.84261   0.230  0.81800    
## Age20-44                             1.91863    0.28723   6.680 2.39e-11 ***
## Age45-64                             0.30374    0.24170   1.257  0.20887    
## Age65-79                            -0.07035    0.24587  -0.286  0.77477    
## raceOther/Multiracial                0.01321    0.33442   0.040  0.96848    
## raceAsian                           -0.33848    0.21335  -1.587  0.11262    
## raceBlack                           -0.18134    0.18242  -0.994  0.32018    
## raceWhite                            0.53760    0.18871   2.849  0.00439 ** 
## marital_statusOther                  0.19643    0.13544   1.450  0.14697    
## marital_statusUnmarried              0.35230    0.22283   1.581  0.11387    
## educationCollege graduate or higher  0.23647    0.19514   1.212  0.22558    
## educationLess than 9th grade        -0.11557    0.25681  -0.450  0.65269    
## BMI.cat>=30                         -2.07309    1.02999  -2.013  0.04414 *  
## BMI.cat18.5-24.9                    -0.70084    1.03852  -0.675  0.49978    
## BMI.cat25-29.9                      -1.35365    1.03262  -1.311  0.18990    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2026.2  on 2528  degrees of freedom
## Residual deviance: 1759.5  on 2512  degrees of freedom
## AIC: 1793.5
## 
## Number of Fisher Scoring iterations: 6

odds.n.ends(Flogit3)

## Waiting for profiling to be done...

## $`Logistic regression model significance`
## Chi-squared        d.f.           p 
##     266.686          16       <.001 
## 
## $`Contingency tables (model fit): frequency predicted`
##                 Number observed
## Number predicted    1    0  Sum
##              1   2180  346 2526
##              0      1    2    3
##              Sum 2181  348 2529
## 
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 86.27916
## 
## $`Model sensitivity`
## [1] 0.9995415
## 
## $`Model specificity`
## [1] 0.005747126
## 
## $`Predictor odds ratios and 95% CI`
##                                             OR       2.5 %      97.5 %
## (Intercept)                         10.1396494 1.853586456 190.2491391
## MET.catmod_PA_level                  1.3292289 1.032208354   1.7075710
## MET.catHigh_PA_level                 1.2139816 0.269279394   8.6217458
## Age20-44                             6.8116082 3.867170851  11.9606666
## Age45-64                             1.3549131 0.833859372   2.1563091
## Age65-79                             0.9320663 0.569249294   1.4962249
## raceOther/Multiracial                1.0133022 0.538681097   2.0142405
## raceAsian                            0.7128524 0.469932181   1.0859099
## raceBlack                            0.8341514 0.582374903   1.1914173
## raceWhite                            1.7118920 1.182266758   2.4794255
## marital_statusOther                  1.2170446 0.934036668   1.5888741
## marital_statusUnmarried              1.4223319 0.928819873   2.2299874
## educationCollege graduate or higher  1.2667717 0.856064923   1.8423145
## educationLess than 9th grade         0.8908548 0.537395820   1.4732595
## BMI.cat>=30                          0.1257958 0.006971679   0.6116315
## BMI.cat18.5-24.9                     0.4961698 0.027273580   2.4797133
## BMI.cat25-29.9                       0.2582960 0.014279399   1.2666657

rocplot(Flogit3)

#Males subgroup

male_NHANES_COMPLETE<- NHANES_COMPLETE%>%
  filter(Gender == "Male")
male_NHANES_COMPLETE <-male_NHANES_COMPLETE%>%
  dplyr::select(combined_MET_Scores_hrs, BMI.cat, db_status, Age, Gender, marital_status, education, race)%>%
  mutate(race = recode_factor(race, "Mexican American" = "Hispanic",
                              "Other Hispanic" = "Hispanic",
                              "Other Race - Including Multi-Rac" = "Other/Multiracial",
                              "Non-Hispanic Asian" = "Asian",
                              "Non-Hispanic Black" = "Black",
                              "Non-Hispanic White" = "White"))

reorder_size <- function(x) {
  factor(x, levels = names(sort(table(x), decreasing = TRUE)))
}

#create Mets score factor
male_NHANES_COMPLETE<-male_NHANES_COMPLETE%>%
  mutate(MET.cat = cut(x = combined_MET_Scores_hrs,
                       breaks = c(-Inf,0, 1000,  +Inf),
                       labels = c("No_physical_activity", "mod_PA_level", "High_PA_level"))
  )


summary(male_NHANES_COMPLETE)

##  combined_MET_Scores_hrs      BMI.cat           db_status       Age     
##  Min.   :   0.00         <18.5    : 29   Diabetic    : 374   >=80 :164  
##  1st Qu.:   5.25         >=30     :928   Non_diabetic:1958   20-44:810  
##  Median :  40.00         18.5-24.9:531                       45-64:872  
##  Mean   : 115.62         25-29.9  :844                       65-79:486  
##  3rd Qu.: 128.33                                                        
##  Max.   :9352.40                                                        
##     Gender       marital_status                      education   
##  Female:   0   Married  :1298   9-11th grade              : 289  
##  Male  :2332   Other    : 620   College graduate or higher:1838  
##                Unmarried: 414   Less than 9th grade       : 205  
##                                                                  
##                                                                  
##                                                                  
##                 race                     MET.cat    
##  Hispanic         :523   No_physical_activity: 495  
##  Other/Multiracial:133   mod_PA_level        :1828  
##  Asian            :319   High_PA_level       :   9  
##  Black            :520                              
##  White            :837                              
##

#Males log regression
#Model1
Mlogit1<- glm(formula= db_status~MET.cat, data= male_NHANES_COMPLETE, family='binomial')
summary(Mlogit1)

## 
## Call:
## glm(formula = db_status ~ MET.cat, family = "binomial", data = male_NHANES_COMPLETE)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9828   0.5493   0.5493   0.5493   0.9005  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            1.1952     0.1064  11.230  < 2e-16 ***
## MET.catmod_PA_level    0.6197     0.1260   4.919 8.69e-07 ***
## MET.catHigh_PA_level  -0.5021     0.7151  -0.702    0.483    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2053.5  on 2331  degrees of freedom
## Residual deviance: 2028.9  on 2329  degrees of freedom
## AIC: 2034.9
## 
## Number of Fisher Scoring iterations: 4

odds.n.ends(Mlogit1)

## Waiting for profiling to be done...

## $`Logistic regression model significance`
## Chi-squared        d.f.           p 
##      24.595           2       <.001 
## 
## $`Contingency tables (model fit): frequency predicted`
##                 Number observed
## Number predicted    1    0  Sum
##              1   1958  374 2332
##              0      0    0    0
##              Sum 1958  374 2332
## 
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 83.96226
## 
## $`Model sensitivity`
## [1] 1
## 
## $`Model specificity`
## [1] 0
## 
## $`Predictor odds ratios and 95% CI`
##                             OR     2.5 %   97.5 %
## (Intercept)          3.3043478 2.6921112 4.087514
## MET.catmod_PA_level  1.8583470 1.4483247 2.374071
## MET.catHigh_PA_level 0.6052632 0.1570922 2.903094

rocplot(Mlogit1)

#Model2 comparing diabetes to physical activity, adjusted for baseline gender, age, and race
GFlogit2<- glm(formula= db_status~MET.cat+Age+race, data = male_NHANES_COMPLETE, family='binomial'(link=logit))
summary(GFlogit2)

## 
## Call:
## glm(formula = db_status ~ MET.cat + Age + race, family = binomial(link = logit), 
##     data = male_NHANES_COMPLETE)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8080   0.1979   0.5465   0.7086   1.0621  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)            0.59574    0.23709   2.513 0.011980 *  
## MET.catmod_PA_level    0.32369    0.13319   2.430 0.015083 *  
## MET.catHigh_PA_level  -0.68260    0.75265  -0.907 0.364441    
## Age20-44               2.46116    0.28976   8.494  < 2e-16 ***
## Age45-64               0.36424    0.21275   1.712 0.086887 .  
## Age65-79              -0.23949    0.21397  -1.119 0.263031    
## raceOther/Multiracial -0.03441    0.26963  -0.128 0.898447    
## raceAsian             -0.03493    0.19757  -0.177 0.859664    
## raceBlack              0.33444    0.17529   1.908 0.056397 .  
## raceWhite              0.54224    0.16364   3.314 0.000921 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2053.5  on 2331  degrees of freedom
## Residual deviance: 1812.1  on 2322  degrees of freedom
## AIC: 1832.1
## 
## Number of Fisher Scoring iterations: 6

odds.n.ends(GFlogit2)

## Waiting for profiling to be done...

## $`Logistic regression model significance`
## Chi-squared        d.f.           p 
##     241.479           9       <.001 
## 
## $`Contingency tables (model fit): frequency predicted`
##                 Number observed
## Number predicted    1    0  Sum
##              1   1958  374 2332
##              0      0    0    0
##              Sum 1958  374 2332
## 
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 83.96226
## 
## $`Model sensitivity`
## [1] 1
## 
## $`Model specificity`
## [1] 0
## 
## $`Predictor odds ratios and 95% CI`
##                               OR     2.5 %    97.5 %
## (Intercept)            1.8143723 1.1475199  2.911414
## MET.catmod_PA_level    1.3822225 1.0622607  1.791120
## MET.catHigh_PA_level   0.5053011 0.1188712  2.548143
## Age20-44              11.7184037 6.6940060 20.937722
## Age45-64               1.4394177 0.9404489  2.169314
## Age65-79               0.7870282 0.5129738  1.188886
## raceOther/Multiracial  0.9661738 0.5759888  1.663552
## raceAsian              0.9656715 0.6571821  1.427336
## raceBlack              1.3971609 0.9915275  1.972639
## raceWhite              1.7198511 1.2471225  2.370095

rocplot(GFlogit2)

# Model3 comparing diabetes to physical activity,adjusted for baseline gender, age, race, education level, marital status, BMI
Mlogit3<- glm(formula = db_status~ MET.cat+Age+race+marital_status+education+BMI.cat,  data= male_NHANES_COMPLETE, family='binomial')
summary(Mlogit3)

## 
## Call:
## glm(formula = db_status ~ MET.cat + Age + race + marital_status + 
##     education + BMI.cat, family = "binomial", data = male_NHANES_COMPLETE)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0020   0.1862   0.3613   0.6900   1.3372  
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          15.36324  415.13710   0.037  0.97048    
## MET.catmod_PA_level                   0.25516    0.13671   1.867  0.06197 .  
## MET.catHigh_PA_level                 -1.00689    0.76605  -1.314  0.18872    
## Age20-44                              2.72426    0.30871   8.825  < 2e-16 ***
## Age45-64                              0.57663    0.22213   2.596  0.00944 ** 
## Age65-79                             -0.09800    0.21993  -0.446  0.65588    
## raceOther/Multiracial                -0.28336    0.28490  -0.995  0.31995    
## raceAsian                            -0.57439    0.22462  -2.557  0.01055 *  
## raceBlack                             0.04949    0.19904   0.249  0.80363    
## raceWhite                             0.33614    0.18570   1.810  0.07027 .  
## marital_statusOther                   0.06275    0.14407   0.436  0.66316    
## marital_statusUnmarried              -0.22341    0.21604  -1.034  0.30108    
## educationCollege graduate or higher   0.13011    0.18804   0.692  0.48898    
## educationLess than 9th grade         -0.48030    0.25291  -1.899  0.05756 .  
## BMI.cat>=30                         -15.15300  415.13701  -0.037  0.97088    
## BMI.cat18.5-24.9                    -14.02730  415.13703  -0.034  0.97304    
## BMI.cat25-29.9                      -14.53871  415.13701  -0.035  0.97206    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2053.5  on 2331  degrees of freedom
## Residual deviance: 1750.3  on 2315  degrees of freedom
## AIC: 1784.3
## 
## Number of Fisher Scoring iterations: 15

odds.n.ends(Mlogit3)

## Waiting for profiling to be done...

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## $`Logistic regression model significance`
## Chi-squared        d.f.           p 
##     303.202          16       <.001 
## 
## $`Contingency tables (model fit): frequency predicted`
##                 Number observed
## Number predicted    1    0  Sum
##              1   1948  362 2310
##              0     10   12   22
##              Sum 1958  374 2332
## 
## $`Count R-squared (model fit): percent correctly predicted`
## [1] 84.04803
## 
## $`Model sensitivity`
## [1] 0.9948927
## 
## $`Model specificity`
## [1] 0.03208556
## 
## $`Predictor odds ratios and 95% CI`
##                                               OR        2.5 %       97.5 %
## (Intercept)                         4.700778e+06 5.295678e-03 1.788754e+59
## MET.catmod_PA_level                 1.290674e+00 9.850183e-01 1.683959e+00
## MET.catHigh_PA_level                3.653550e-01 8.420537e-02 1.887279e+00
## Age20-44                            1.524515e+01 8.398596e+00 2.827640e+01
## Age45-64                            1.780032e+00 1.143259e+00 2.735992e+00
## Age65-79                            9.066473e-01 5.845095e-01 1.386669e+00
## raceOther/Multiracial               7.532519e-01 4.350536e-01 1.333785e+00
## raceAsian                           5.630482e-01 3.623655e-01 8.750819e-01
## raceBlack                           1.050737e+00 7.103867e-01 1.551434e+00
## raceWhite                           1.399536e+00 9.703748e-01 2.011059e+00
## marital_statusOther                 1.064759e+00 8.045320e-01 1.415843e+00
## marital_statusUnmarried             7.997835e-01 5.273159e-01 1.232532e+00
## educationCollege graduate or higher 1.138952e+00 7.814319e-01 1.635335e+00
## educationLess than 9th grade        6.185988e-01 3.756626e-01 1.013903e+00
## BMI.cat>=30                         2.625045e-07 3.130560e-59 3.070531e+02
## BMI.cat18.5-24.9                    8.091372e-07 5.355139e-59 8.500521e+02
## BMI.cat25-29.9                      4.851958e-07 5.141191e-59 5.554286e+02

rocplot(Mlogit3)

NHANES_2018_PA_DM

Okiemute Omoru

2022-08-02