library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
nhisdata<-read.csv('/Volumes/FLASHDRIVE/Data 333/NHIS Data.csv')
nhisdata%>%
filter(Demo_agerange_C %in% c("18-29","50-59"))%>%
group_by(Demo_agerange_C)%>%
summarize(avg_Behav_AlcDaysPerYear_N=mean(Behav_AlcDaysPerYear_N,na.rm=TRUE))
## # A tibble: 2 x 2
## Demo_agerange_C avg_Behav_AlcDaysPerYear_N
## * <chr> <dbl>
## 1 18-29 53.3
## 2 50-59 67.0
nhisdata%>%
filter(Demo_agerange_C %in% c("18-29","50-59"))%>%
ggplot(aes(x=Behav_AlcDaysPerYear_N,y=Demo_agerange_C,fill=Demo_agerange_C))+
geom_boxplot(alpha=0.7)+
stat_summary(fun.y=mean, geom="point", shape=20, size=14, color="red")+
theme(legend.position = "none")
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Removed 53815 rows containing non-finite values (stat_boxplot).
## Warning: Removed 53815 rows containing non-finite values (stat_summary).
nhisdata%>%
filter(Demo_agerange_C %in% c("18-29","50-59"))%>%
ggplot(aes(x=Behav_AlcDaysPerYear_N,fill=Demo_agerange_C))+
geom_histogram(binwidth=10)+
facet_wrap(~Demo_agerange_C)+
theme(legend.position = "none")
## Warning: Removed 53815 rows containing non-finite values (stat_bin).
Nhisdata<-nhisdata%>%
select(Demo_agerange_C,Behav_AlcDaysPerYear_N)%>%
filter(Demo_agerange_C %in% c("18-29","50-59"))
young_data<-Nhisdata%>%
filter(Demo_agerange_C=="18-29")
old_data<-Nhisdata%>%
filter(Demo_agerange_C=="50-59")
sample(young_data$Behav_AlcDaysPerYear_N,40)%>%
mean(na.rm=TRUE)
## [1] 52.86667
sample(old_data$Behav_AlcDaysPerYear_N,40)%>%
mean(na.rm=TRUE)
## [1] 75.39394
replicate(10000,
sample(young_data$Behav_AlcDaysPerYear_N,40)%>%
mean(na.rm=TRUE)
)%>%
data.frame()%>%
rename("mean"=1) %>%
ggplot()+
geom_histogram(aes(x=mean),fill="red",alpha=0.7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
replicate(10000,
sample(old_data$Behav_AlcDaysPerYear_N,40)%>%
mean(na.rm=TRUE)
)%>%
data.frame()%>%
rename("mean"=1) %>%
ggplot()+
geom_histogram(aes(x=mean),fill="light blue",alpha=0.7)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
t.test(Behav_AlcDaysPerYear_N~Demo_agerange_C,data=Nhisdata)
##
## Welch Two Sample t-test
##
## data: Behav_AlcDaysPerYear_N by Demo_agerange_C
## t = -30.903, df = 140111, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -14.60878 -12.86623
## sample estimates:
## mean in group 18-29 mean in group 50-59
## 53.29934 67.03684
```