Source: Psi Chi R Contest - July

The Psi Chi R Contests are a great way to practice base R. Click Source for more info.

julycontest=read.csv('July_R_Contest.csv')

head(julycontest)

Data processing (level 1)

#Write a script that will filter out participants who don’t have any siblings
julycontest_subset = subset(julycontest,Siblings==0)

Descriptive Statistics (level 2)

#Create a variable called SUmarij by adding YRBS8 + YRBS14

julycontest_subset$SUmarij=julycontest_subset$YRBS8 + julycontest_subset$YRBS14

#Find the average number of times participants have used marijuana

julycontest_mean=mean(julycontest_subset$SUmarij,na.rm=T)

print(julycontest_mean)
## [1] 2.243243
#Transform SUmarij into a binary variable where 0 = never used marijuana, and 1 = has used marijuana

julycontest_subset$mari_binary= ifelse(julycontest_subset$SUmarij=='Never Used',0,'Has Used')

write.csv(julycontest_subset,file='see.csv')

Data visualization (level 3)

#Use cross-tabs to make a table that shows the counts of marijuana use (never used/has used) by gender
table_counts=table(julycontest_subset$mari_binary,julycontest_subset$Gender,useNA='always')

print(table_counts)
##           
##             1  2 <NA>
##   Has Used 20 16    1
##   <NA>      0  0    5

Inferential statistics (level 4)

#Test if there is an association between marijuana use and number of siblings someone has

corr_test=cor(julycontest_subset[,c('Siblings','SUmarij')], use='pairwise.complete.obs')
## Warning in cor(julycontest_subset[, c("Siblings", "SUmarij")], use =
## "pairwise.complete.obs"): the standard deviation is zero
print(corr_test)
##          Siblings SUmarij
## Siblings       NA      NA
## SUmarij        NA       1