setwd("C:/Users/knm6/Box Sync/courses/2016 Fall MATH 225/SAMH")
load("NSDUH-2014-DS0001-data-r.rda")
data <- da36361.0001
Introduction: give background info, talk about the dataset, give questions of interest that this report will answer.
How many people have ever used marijuana in their lives?
table(data[["MJEVER"]])/length(na.omit(data[["MJEVER"]]))
##
## (1) Yes (2) No
## 0.4246796 0.5753204
Of 55244 people surveyed, 42.5% said that they had used marijuana at least once in their lives and 57.5% said that they had never used marijuana.
Are there difference amoung racial groups in rate of marijuana usage?
whiteData <- data[data$NEWRACE2 == "(1) NonHisp White",]
blackData <- data[data$NEWRACE2 == "(2) NonHisp Black/Afr Am",]
hispData <- data[data$NEWRACE2 == "(7) Hispanic",]
table(whiteData[["MJEVER"]])/length(na.omit(whiteData[["MJEVER"]]))
##
## (1) Yes (2) No
## 0.4652009 0.5347991
table(blackData[["MJEVER"]])/length(na.omit(blackData[["MJEVER"]]))
##
## (1) Yes (2) No
## 0.4087308 0.5912692
table(hispData[["MJEVER"]])/length(na.omit(hispData[["MJEVER"]]))
##
## (1) Yes (2) No
## 0.3263655 0.6736345
numAnsHisp <- length(na.omit(hispData[["MJEVER"]]))
perYesHisp <- table(hispData[["MJEVER"]])[[1]]/length(na.omit(hispData[["MJEVER"]]))
perNoHisp<- table(hispData[["MJEVER"]])[[2]]/length(na.omit(hispData[["MJEVER"]]))
mjUseByRace<- rbind(table(whiteData[["MJEVER"]])/length(na.omit(whiteData[["MJEVER"]])), table(blackData[["MJEVER"]])/length(na.omit(blackData[["MJEVER"]])), table(hispData[["MJEVER"]])/length(na.omit(hispData[["MJEVER"]])))
barplot(mjUseByRace,beside=TRUE,names.arg=c("Yes","No"),main="Have You Ever Tried Marijuana (Even Once)?",xlab="Response",ylab="Percent",col=c("Red","Blue","Green"))
legend("topleft",c("White","Black","Hispanic"),col=c("Red","Blue","Green"),pch=15)
Amoung our surveyed subjects, whites had the highest rate of marijuana usage with 46.5% reporting that they had ever used marijuana, compared to 40.9% of surved black individuals and 32.6% of surveyed Hispanic individuals.
Amoung those who have ever used, when did they start?
userData <- data[data$MJEVER == "(1) Yes",]
hist(userData[["MJAGE"]])
mean(na.omit(userData[["MJAGE"]]))
## [1] 17.06404
sd(na.omit(userData[["MJAGE"]]))
## [1] 4.573829
One question of interest is whether frequent users of marijuana report a different level of overall health than non-users? We will define frequent users as those who report using marijuana at least 50 days in the past 12 months and non-users as those who report that they have never used marijuana (even once).
nonUserData <- data[data$MJEVER == "(2) No",]
freqUsersData <- data[data$MJYRTOT > 50,]
barplot(table(freqUsersData$HEALTH)/length(na.omit(freqUsersData$HEALTH)))
barplot(table(nonUserData$HEALTH)/length(na.omit(nonUserData$HEALTH)))
health <- rbind(table(freqUsersData$HEALTH)/length(na.omit(freqUsersData$HEALTH)),table(nonUserData$HEALTH)/length(na.omit(nonUserData$HEALTH)))
barplot(health,beside=TRUE,col=c("green","red"),names.arg=c("Excellent","Very Good", "Good","Fair","Poor"),xlab="Self-described overall health")
legend("topleft", c("Freq Users","Non Users"),col=c("Green","Red"),bty="n",pch=15)
Our next question of interest was whether there is an association between the age at which one first tries cigarettes and the age at which one first tries marijuana. To investigate this question, we divided the data into two groups: those who had first tried cigarettes between the ages of 16 and 25 and those who had never tried cigarettes or who had first tried cigarettes after 25.
triedCigBetween16and25 <- data[data$CIGTRY >= 16 & data$CIGTRY <= 25,]
triedCigAfter25orNever <- data[data$CIGTRY > 25 | data$CIGEVER == "(2) No",]
table(triedCigBetween16and25$MJEVER)/length(na.omit(triedCigBetween16and25$MJEVER))
##
## (1) Yes (2) No
## 0.6150749 0.3849251
table(triedCigAfter25orNever$MJEVER)/length(na.omit(triedCigAfter25orNever$MJEVER))
##
## (1) Yes (2) No
## 0.149603 0.850397
combinedTable <- rbind(table(triedCigBetween16and25$MJEVER)/length(na.omit(triedCigBetween16and25$MJEVER)),table(triedCigAfter25orNever$MJEVER)/length(na.omit(triedCigAfter25orNever$MJEVER)))
percMJearly <- combinedTable[1,1]*100
percMJlate <- combinedTable[2,1]*100
barplot(combinedTable,beside=TRUE,names.arg=c("Yes","No"),main="Have You Ever Tried Marijuana (Even Once)?",xlab="Response",ylab="Percent",col=c("Red","Blue"))
legend("topleft",c("First Tried Cigarettes Between 16 and 25","Tried Cigarettes After 25 or Never"),col=c("Red","Blue"),pch=15)
hist(triedCigBetween16and25$MJAGE,main="Age of First Marijuana Use for Early Cigarettes Users",xlab="Age",ylab="Number of Responses")
hist(triedCigAfter25orNever$MJAGE,main="Age of First Marijuana Use for Late Cigarette Users/Non-Users",xlab="Age",ylab="Number of Responses")
median(na.omit(triedCigBetween16and25$MJAGE))
## [1] 18
mean(na.omit(triedCigBetween16and25$MJAGE))
## [1] 18.31426
sd(na.omit(triedCigBetween16and25$MJAGE))
## [1] 4.239675
median(na.omit(triedCigAfter25orNever$MJAGE))
## [1] 17
mean(na.omit(triedCigAfter25orNever$MJAGE))
## [1] 18.02425
sd(na.omit(triedCigAfter25orNever$MJAGE))
## [1] 5.093176
Among the surveyed individuals who had first tried cigarettes between 16 and 25–hereafter refered to as “early cigarette users”, approximatley 61.5% had used marijuana at least once, compared to 15% of those who either had never tried cigarettes or who first tried cigarettes after 25–hereafter refered to as “late cigarette users/non-users.” We then examined the age at which those individuals who had tried marijuana at least once first used marijuana. The ages of first use for early cigarette users and late users/non-users are displayed in histograms above. Among early cigarette users who tried marijuana at least once, the mean age of first use was 18.3 years with a standard deviation of 4.2. Among late cigarette users/non-users who tried marijuana at least once, the mean age of first use was 18 years with a standard deviation of 5.1 years.
We observed a large difference in the rate of marijuana use between the two groups, with 15% of surveyed early cigarette users having used marijuana at least once, which was nearly 4.5 times the rate of having used marijuana at least once among the surveyed late cigarette users/non-users. However, among those who did try marijuana at least once, the age at which they first did so was similar between the early and late/non-users of cigarettes, although the late/non-users group had a more variable age of first marijuana use. Below, we illustrate the similarity by overlaying relative frequency histograms for the age of first use for the two groups.
hist(triedCigBetween16and25$MJAGE,main="Age of First Marijuana Use for Early Cigarettes Users",xlab="Age",ylab="% of Responses",freq=F,col=rgb(red=1,green=0,blue=0,alpha=0.5))
hist(triedCigAfter25orNever$MJAGE,main="Age of First Marijuana Use for Late Cigarette Users/Non-Users",xlab="Age",ylab="% of Responses",freq=F,add=T,col=rgb(red=0,green=0,blue=1,alpha=0.5))
legend("topright",c("First Tried Cigarettes Between 16 and 25","Tried Cigarettes After 25 or Never"),col=c(rgb(red=1,green=0,blue=0,alpha=0.5),rgb(red=0,green=0,blue=1,alpha=0.5)),pch=15)