library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
healthdata <- read_csv("/Users/rebeccagibble/Downloads/SD3 NHIS Data.csv")
## Parsed with column specification:
## cols(
## year = col_double(),
## Demo_Race = col_character(),
## Demo_sex_C = col_character(),
## Demo_sexorien_C = col_character(),
## Demo_belowpovertyline_B = col_double(),
## Demo_agerange_C = col_character(),
## Demo_marital_C = col_character(),
## Health_SelfRatedHealth_C = col_character(),
## MentalHealth_MentalIllnessK6_N = col_double(),
## Health_BMI_N = col_double(),
## Behav_CigsPerDay_N = col_double(),
## Behav_AlcDaysPerYear_N = col_double(),
## Behav_AlcDaysPerWeek_N = col_double(),
## Behav_BingeDrinkDaysYear_N = col_double()
## )
data<-healthdata%>%
select(Demo_sex_C, Behav_BingeDrinkDaysYear_N)%>%
filter(Demo_sex_C %in% c("male","female"))
data%>%
select(Demo_sex_C, Behav_BingeDrinkDaysYear_N)%>%
filter(Demo_sex_C %in% c("male","female"))%>%
group_by(Demo_sex_C)%>%
summarize(Behav_BingeDrinkDaysYear_N=mean(Behav_BingeDrinkDaysYear_N,na.rm=TRUE))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 2 x 2
## Demo_sex_C Behav_BingeDrinkDaysYear_N
## <chr> <dbl>
## 1 female 6.99
## 2 male 16.6
data%>%
ggplot()+
geom_histogram(aes(x=Behav_BingeDrinkDaysYear_N))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 11819 rows containing non-finite values (stat_bin).
data%>%
ggplot()+
geom_histogram(aes(x=Behav_BingeDrinkDaysYear_N))+
facet_wrap(~Demo_sex_C)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 11819 rows containing non-finite values (stat_bin).
data1=female_data<-data%>%
filter(Demo_sex_C=="male")
data2=male_data<-data%>%
filter(Demo_sex_C=="female")
male_data<-replicate(10000,
sample(data1$Behav_BingeDrinkDaysYear_N, 40)%>%mean(na.rm=TRUE)
)%>%
data.frame()%>%
rename("mean"=1)
female_data<-replicate(10000,
sample(data2$Behav_BingeDrinkDaysYear_N, 40)%>%mean(na.rm=TRUE)
)%>%
data.frame()%>%
rename("mean"=1)
ggplot()+
geom_histogram(data=male_data,aes(x=mean),fill="blue")+
geom_histogram(data=female_data,aes(x=mean),fill="pink")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
t.test(Behav_BingeDrinkDaysYear_N~Demo_sex_C, data=data)
##
## Welch Two Sample t-test
##
## data: Behav_BingeDrinkDaysYear_N by Demo_sex_C
## t = -16.253, df = 16765, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -10.750953 -8.436967
## sample estimates:
## mean in group female mean in group male
## 6.987474 16.581434