install.packages("NHANES")
- Do not include an evaluated install.package() in your Rmd program. What can happen if you do?
- What is in the “Description” section of the help page for the “NHANES” object?
“This is survey data collected by the US National Center for Health Statistics (NCHS) which has conducted a series of health and nutrition surveys since the early 1960’s. Since 1999 approximately 5,000 individuals of all ages are interviewed in their homes every year and complete the health examination component of the survey. The health examination is conducted in a mobile examination centre (MEC).”
library(NHANES)
library(dplyr)
What can happen if you have an evaluated View() command in your .Rmd file? >
summary(NHANES$Weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.80 56.10 72.70 70.98 88.90 230.70 78
summary(NHANES$Height)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 83.6 156.8 166.0 161.9 174.5 200.4 353
How many missing values are there for weight and height?
#Creating mini dataframe
WtHt <- NHANES %>%
select(Weight,Height)
#creating custom function
clean.mean.dif <- function(df,i) {
subset <- slice(df,i)%>%
na.omit()
# need to use "[[" operator to extract as a vector
mean_col_1 <- mean(subset[[1]])
mean_col_2 <- mean(subset[[2]])
mean_diff <- mean_col_2 - mean_col_1
mean_diff
}
clean.mean.dif(WtHt,1:10)
## [1] 87.37
#creating new mini dataframe
WtGender <- NHANES %>%
select(Weight,Gender)
#defining function that will take the difference of means from the values in Col 1 associated with col 2's factor level 2 and those associated with level 1
mean.diff.factor <- function(df,i) {
subset <- slice(df,i)%>%
na.omit()
#filtering df subset on factor level 1 in col 2
subset_L1 <- subset %>%
filter(subset[[2]] == levels(subset[[2]])[1])
#filtering df subset on factor level 2 in col 2
subset_L2<- subset %>%
filter(subset[[2]] == levels(subset[[2]])[2])
mean_subset_L1 <- mean(subset_L1[[1]])
mean_subset_L2 <- mean(subset_L2[[1]])
# in this case we want to find males - females.
# males is level 2, females is level 1
mean_diff <- mean_subset_L2 - mean_subset_L1
mean_diff
}
mean.diff.factor(WtGender,1:10000)
## [1] 9.583432
mean.diff.ab <- function(df,i,mean_col_index,fac_col_index,level_a,level_b) {
subset <- slice(df,i)%>%
na.omit()
#filtering df subset on factor level 1 in col 2
subset_level_a <- subset %>%
filter(subset[[fac_col_index]] == levels(subset[[2]])[level_a])
#filtering df subset on factor level 2 in col 2
subset_level_b<- subset %>%
filter(subset[[fac_col_index]] == levels(subset[[2]])[level_b])
mean_subset_La <- mean(subset_level_a[[mean_col_index]])
mean_subset_Lb <- mean(subset_level_b[[mean_col_index]])
# in this case we want to find males - females.
# males is level 2, females is level 1
mean_diff <- mean_subset_La - mean_subset_Lb
mean_diff
}
#this argument is asking to find means in col[1] based on factors in col[2].
#It is then specifying to find the difference between level 2 associated means and level 1 associated means
mean.diff.ab(WtGender,1:10000,1,2,2,1)
## [1] 9.583432