2022-04-15 Project

rm(list=ls()) library(nhanesA) library(dplyr)

Download the required survey cycles for a component file

downloadNHANES <- function(fileprefix){ print (fileprefix) outdf <- data.frame(NULL) urlstring <- paste(‘https://wwwn.cdc.gov/Nchs/Nhanes/’,yrs,‘/’,fileprefix,letters,‘.XPT’, sep=’’) download.file(urlstring, tf <- tempfile(), mode=“wb”) tmpframe <- foreign::read.xport(tf) outdf <- bind_rows(outdf, tmpframe) return(outdf) }

Specify the survey cycles required, with corresponding file suffixes

yrs <- c(‘2015-2016’) letters <- c(’_I’)

Demographics data

dd <- downloadNHANES(‘DEMO’) dd.vars <- c(“SEQN”, “RIAGENDR”,“RIDAGEYR”) # respondent sequence number, Gender and age in years at screening dd.data <- nhanesTranslate(‘DEMO_I’, dd.vars, data=dd[,dd.vars]) names(dd.data) <- c(“id”, “gender”, “age”) dim(dd) table(dd.data$gender, useNA = "always") table(dd.data$age, useNA = “always”)

Alcohol use data

ALQ <- downloadNHANES(“ALQ”) ALQ.vars <- c(“SEQN”, “ALQ120U”) # respondent sequence number and unit of measure for alcohol use ALQ.data <- nhanesTranslate(‘ALQ_I’, ALQ.vars, data=ALQ[,ALQ.vars]) names(ALQ.data) <- c(“id”, “Alcohol use”)

BMI data

BMI <- downloadNHANES(“BMX”) BMI.vars <- c(“SEQN”,“BMXBMI”) # resondent sequence number, Body Mass Index (kg/m**2) BMI.data <- nhanesTranslate(‘BMX_I’, BMI.vars, data=BMI[,BMI.vars]) names(BMI.data) <- c(“id”, “BMI”) summary(BMI.data)

Merge data

install.packages(“tidyverse”) library(tidyverse) merged.data<-list(dd.data, BMI.data, ALQ.data)%>%purrr::reduce(full_join,by=‘id’) dim(merged.data) save(merged.data, file=“2901_nhanesdata.RData”) merged.data summary(merged.data)

Identify the variables that are integers

int.count<-sapply(merged.data,function(x) is.integer(x)) int.count[int.count==TRUE]

Check and delete missing values

require(data.table) data1516 <- data.table(merged.data) complete.cases(data1516) sum(is.na(data1516)) data1516 <- na.omit(data1516) sum(is.na(data1516)) str(data1516) summary(data1516)

Delete outliers

attach(data1516) fit <- aov(BMI~Alcohol use, data = data1516) summary(fit) attach(data1516) aggregate(data1516$BMI, by=list(data1516$Alcohol use), FUN=mean) library(car) outlierTest(fit)$rstudent

Assessing outliers

num_outliers <- c(names(outlierTest(fit)$rstudent)) num_outliers <- as.numeric(num_outliers) data1516 <- data.table(data1516) data1516_outliers <- data1516[!num_outliers,]

Normality test

library(car) qqPlot(lm(BMI~Alcohol use, data = data1516_outliers), simulate=T, main = “Q-Q Plot”) with(data1516_outliers, tapply(BMI, Alcohol use, shapiro.test))

Homogeneity test of variance

library(car) leveneTest(BMI~Alcohol use, data=data1516_outliers)

Kruskal test

kruskal.test(BMI~Alcohol use, data=data1516_outliers)

Adjustment of p-value

with(data1516_outliers, pairwise.wilcox.test(BMI,Alcohol use, p.adjust.method = “bonf”))