Source: Psi Chi R Contest - July
The Psi Chi R Contests are a great way to practice base R. Click Source for more info.
julycontest=read.csv('July_R_Contest.csv')
head(julycontest)
#Write a script that will filter out participants who don’t have any siblings
julycontest_subset = subset(julycontest,Siblings==0)
#Create a variable called SUmarij by adding YRBS8 + YRBS14
julycontest_subset$SUmarij=julycontest_subset$YRBS8 + julycontest_subset$YRBS14
#Find the average number of times participants have used marijuana
julycontest_mean=mean(julycontest_subset$SUmarij,na.rm=T)
print(julycontest_mean)
## [1] 2.243243
#Transform SUmarij into a binary variable where 0 = never used marijuana, and 1 = has used marijuana
julycontest_subset$mari_binary= ifelse(julycontest_subset$SUmarij=='Never Used',0,'Has Used')
write.csv(julycontest_subset,file='see.csv')
#Use cross-tabs to make a table that shows the counts of marijuana use (never used/has used) by gender
table_counts=table(julycontest_subset$mari_binary,julycontest_subset$Gender,useNA='always')
print(table_counts)
##
## 1 2 <NA>
## Has Used 20 16 1
## <NA> 0 0 5
#Test if there is an association between marijuana use and number of siblings someone has
corr_test=cor(julycontest_subset[,c('Siblings','SUmarij')], use='pairwise.complete.obs')
## Warning in cor(julycontest_subset[, c("Siblings", "SUmarij")], use =
## "pairwise.complete.obs"): the standard deviation is zero
print(corr_test)
## Siblings SUmarij
## Siblings NA NA
## SUmarij NA 1