library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
userdata<-read.csv('/Volumes/FLASHDRIVE/Data 333/Skills drill 3? data.csv')
str(userdata)
## 'data.frame': 57146 obs. of 20 variables:
## $ sexident : chr NA "Straight" "Straight" NA ...
## $ Nervous : int NA 0 2 NA 1 2 0 NA NA NA ...
## $ Hopeless : int NA 0 1 NA 3 1 0 NA NA NA ...
## $ Restless : int NA 0 1 NA 2 1 0 NA NA NA ...
## $ Effort : int NA NA 0 NA 2 2 0 NA NA NA ...
## $ Sad : int NA 0 0 NA 1 1 0 NA NA NA ...
## $ Worthless : int NA 0 0 NA 2 1 0 NA NA NA ...
## $ k6score : int NA NA 4 NA 11 8 0 NA NA NA ...
## $ k6category : chr NA NA "Low Risk" NA ...
## $ marij_month : chr "No" "Yes" "No" "No" ...
## $ cocaine_month : chr "No" "No" "No" "No" ...
## $ crack_month : chr "No" "No" "No" "No" ...
## $ heroin_month : chr "No" "No" "No" "No" ...
## $ hallucinogen_month: chr "No" "No" "No" "No" ...
## $ inhalant_month : chr "No" "No" "No" "No" ...
## $ meth_month : chr "No" "No" "No" "No" ...
## $ painrelieve_month : chr "No" "No" "No" "No" ...
## $ tranq_month : chr "No" "No" "No" "No" ...
## $ stimulant_month : chr "No" "No" "No" "No" ...
## $ sedative_month : chr "No" "No" "No" "No" ...
userdata%>%
filter(marij_month %in% c("Yes","No")) %>%
group_by(marij_month) %>%
summarize(k6score =mean(k6score ,na.rm=TRUE))
## # A tibble: 2 x 2
## marij_month k6score
## * <chr> <dbl>
## 1 No 4.16
## 2 Yes 6.43
userdata%>%
filter(marij_month %in% c("Yes","No")) %>%
group_by(marij_month) %>%
summarize(k6score =mean(k6score ,na.rm=TRUE)) %>%
ggplot(aes(x=marij_month,y=k6score,fill=marij_month)) +
geom_col() +
theme_classic()+
theme(plot.title = element_text(hjust = 0.5),plot.subtitle =element_text(hjust = 0.5)) +
theme(axis.text.x=element_text(angle=90, hjust=1), legend.position = "right")+
labs(x="Risk for Serious Mental Illness", y="Count", title="Monthly Marijuana Use & Risk for Serious Mental Illness", subtitle="Anika Lewis") +
scale_fill_manual("Use w/n the past 30 days",values =c("Yes"="dark green","No"="black"))
userdata%>%
filter(marij_month %in% c("Yes","No")) %>%
na.omit(userdata) %>%
ggplot(aes(x=k6score,fill=marij_month)) +
geom_histogram() +
facet_wrap(~marij_month) +
theme_classic()+
theme(plot.title = element_text(hjust = 0.5),plot.subtitle =element_text(hjust = 0.5)) +
theme(axis.text.x=element_text(angle=90, hjust=1), legend.position = "right")+
labs(x="Risk for Serious Mental Illness", y="Count",title="Monthly Marijuana Use & Risk for Serious Mental Illness", subtitle="Anika Lewis") +
scale_fill_manual("Use w/n the past 30 days",values =c("Yes"="dark green","No"="black"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
yes_data<-userdata%>%
filter(marij_month=="Yes")
no_data<-userdata%>%
filter(marij_month=="No")
sample(yes_data$k6score,40)%>%
mean(na.rm=TRUE)
## [1] 6.027778
sample(no_data$k6score,40)%>%
mean(na.rm=TRUE)
## [1] 4.964286
replicate(10000,
sample(yes_data$k6score,40)%>%
mean(na.rm=TRUE)
)%>%
data.frame()%>%
rename("mean"=1) %>%
ggplot()+
geom_histogram(aes(x=mean),fill="black")+
theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
replicate(10000,
sample(no_data$k6score,40)%>%
mean(na.rm=TRUE)
)%>%
data.frame()%>%
rename("mean"=1) %>%
ggplot()+
geom_histogram(aes(x=mean),fill="dark green")+
theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Userdata<-userdata%>%
select(marij_month,k6score)%>%
filter(marij_month %in% c("Yes","No"))
t.test(k6score~marij_month,data=Userdata)
##
## Welch Two Sample t-test
##
## data: k6score by marij_month
## t = -28.099, df = 6078.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.434468 -2.116930
## sample estimates:
## mean in group No mean in group Yes
## 4.155773 6.431472