url_drugs <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv"
drug <- read.csv(url_drugs, stringsAsFactors = F)
Renaming some columns to make separating with “.” easier:
drug2 <- drug %>%
rename("pain releiver.use" = pain.releiver.use,
"pain releiver.frequency" = pain.releiver.frequency)
knitr::kable(head(drug2, 10))
age | n | alcohol.use | alcohol.frequency | marijuana.use | marijuana.frequency | cocaine.use | cocaine.frequency | crack.use | crack.frequency | heroin.use | heroin.frequency | hallucinogen.use | hallucinogen.frequency | inhalant.use | inhalant.frequency | pain releiver.use | pain releiver.frequency | oxycontin.use | oxycontin.frequency | tranquilizer.use | tranquilizer.frequency | stimulant.use | stimulant.frequency | meth.use | meth.frequency | sedative.use | sedative.frequency |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
12 | 2798 | 3.9 | 3 | 1.1 | 4 | 0.1 | 5.0 | 0.0 | - | 0.1 | 35.5 | 0.2 | 52 | 1.6 | 19.0 | 2.0 | 36 | 0.1 | 24.5 | 0.2 | 52.0 | 0.2 | 2.0 | 0.0 | - | 0.2 | 13.0 |
13 | 2757 | 8.5 | 6 | 3.4 | 15 | 0.1 | 1.0 | 0.0 | 3.0 | 0.0 | - | 0.6 | 6 | 2.5 | 12.0 | 2.4 | 14 | 0.1 | 41.0 | 0.3 | 25.5 | 0.3 | 4.0 | 0.1 | 5.0 | 0.1 | 19.0 |
14 | 2792 | 18.1 | 5 | 8.7 | 24 | 0.1 | 5.5 | 0.0 | - | 0.1 | 2.0 | 1.6 | 3 | 2.6 | 5.0 | 3.9 | 12 | 0.4 | 4.5 | 0.9 | 5.0 | 0.8 | 12.0 | 0.1 | 24.0 | 0.2 | 16.5 |
15 | 2956 | 29.2 | 6 | 14.5 | 25 | 0.5 | 4.0 | 0.1 | 9.5 | 0.2 | 1.0 | 2.1 | 4 | 2.5 | 5.5 | 5.5 | 10 | 0.8 | 3.0 | 2.0 | 4.5 | 1.5 | 6.0 | 0.3 | 10.5 | 0.4 | 30.0 |
16 | 3058 | 40.1 | 10 | 22.5 | 30 | 1.0 | 7.0 | 0.0 | 1.0 | 0.1 | 66.5 | 3.4 | 3 | 3.0 | 3.0 | 6.2 | 7 | 1.1 | 4.0 | 2.4 | 11.0 | 1.8 | 9.5 | 0.3 | 36.0 | 0.2 | 3.0 |
17 | 3038 | 49.3 | 13 | 28.0 | 36 | 2.0 | 5.0 | 0.1 | 21.0 | 0.1 | 64.0 | 4.8 | 3 | 2.0 | 4.0 | 8.5 | 9 | 1.4 | 6.0 | 3.5 | 7.0 | 2.8 | 9.0 | 0.6 | 48.0 | 0.5 | 6.5 |
18 | 2469 | 58.7 | 24 | 33.7 | 52 | 3.2 | 5.0 | 0.4 | 10.0 | 0.4 | 46.0 | 7.0 | 4 | 1.8 | 4.0 | 9.2 | 12 | 1.7 | 7.0 | 4.9 | 12.0 | 3.0 | 8.0 | 0.5 | 12.0 | 0.4 | 10.0 |
19 | 2223 | 64.6 | 36 | 33.4 | 60 | 4.1 | 5.5 | 0.5 | 2.0 | 0.5 | 180.0 | 8.6 | 3 | 1.4 | 3.0 | 9.4 | 12 | 1.5 | 7.5 | 4.2 | 4.5 | 3.3 | 6.0 | 0.4 | 105.0 | 0.3 | 6.0 |
20 | 2271 | 69.7 | 48 | 34.0 | 60 | 4.9 | 8.0 | 0.6 | 5.0 | 0.9 | 45.0 | 7.4 | 2 | 1.5 | 4.0 | 10.0 | 10 | 1.7 | 12.0 | 5.4 | 10.0 | 4.0 | 12.0 | 0.9 | 12.0 | 0.5 | 4.0 |
21 | 2354 | 83.2 | 52 | 33.0 | 52 | 4.8 | 5.0 | 0.5 | 17.0 | 0.6 | 30.0 | 6.3 | 4 | 1.4 | 2.0 | 9.0 | 15 | 1.3 | 13.5 | 3.9 | 7.0 | 4.1 | 10.0 | 0.6 | 2.0 | 0.3 | 9.0 |
Gathering columns, under key and values:
drug_gathered <- suppressWarnings(drug2 %>%
gather(key, value, -age, -n) %>%
arrange(age))
head(drug_gathered, 10)
## age n key value
## 1 12 2798 alcohol.use 3.9
## 2 12 2798 alcohol.frequency 3
## 3 12 2798 marijuana.use 1.1
## 4 12 2798 marijuana.frequency 4
## 5 12 2798 cocaine.use 0.1
## 6 12 2798 cocaine.frequency 5.0
## 7 12 2798 crack.use 0
## 8 12 2798 crack.frequency -
## 9 12 2798 heroin.use 0.1
## 10 12 2798 heroin.frequency 35.5
Separating key into subs and variables before spreading variables into 2 columns:
drug_gathered2 <- drug_gathered %>%
separate(key, into = c("subs", "variables"), sep = "\\." )
head(drug_gathered2,10)
## age n subs variables value
## 1 12 2798 alcohol use 3.9
## 2 12 2798 alcohol frequency 3
## 3 12 2798 marijuana use 1.1
## 4 12 2798 marijuana frequency 4
## 5 12 2798 cocaine use 0.1
## 6 12 2798 cocaine frequency 5.0
## 7 12 2798 crack use 0
## 8 12 2798 crack frequency -
## 9 12 2798 heroin use 0.1
## 10 12 2798 heroin frequency 35.5
Spreading variables into use and frequency columns:
drug_gathered3 <- drug_gathered2 %>%
spread(variables, value)
knitr::kable(head(drug_gathered3, 15))
age | n | subs | frequency | use |
---|---|---|---|---|
12 | 2798 | alcohol | 3 | 3.9 |
12 | 2798 | cocaine | 5.0 | 0.1 |
12 | 2798 | crack | - | 0 |
12 | 2798 | hallucinogen | 52 | 0.2 |
12 | 2798 | heroin | 35.5 | 0.1 |
12 | 2798 | inhalant | 19.0 | 1.6 |
12 | 2798 | marijuana | 4 | 1.1 |
12 | 2798 | meth | - | 0 |
12 | 2798 | oxycontin | 24.5 | 0.1 |
12 | 2798 | pain releiver | 36 | 2 |
12 | 2798 | sedative | 13 | 0.2 |
12 | 2798 | stimulant | 2 | 0.2 |
12 | 2798 | tranquilizer | 52 | 0.2 |
13 | 2757 | alcohol | 6 | 8.5 |
13 | 2757 | cocaine | 1.0 | 0.1 |
Table alcohol use:
drug_gathered3 <- suppressWarnings(drug_gathered3 %>%
mutate(use = as.numeric(use),
frequency = as.numeric(frequency)))
alcohol <- drug_gathered3 %>%
filter(subs=="alcohol") %>%
select(-subs)
head(alcohol)
## age n frequency use
## 1 12 2798 3 3.9
## 2 13 2757 6 8.5
## 3 14 2792 5 18.1
## 4 15 2956 6 29.2
## 5 16 3058 10 40.1
## 6 17 3038 13 49.3
Association between age and alcohol usage - frequency and age:
ggplot(alcohol) + geom_point(aes(age, y = use), color = "blue") + labs(x = "Age", y = "Median Use")
ggplot(alcohol) + geom_point(aes(age, y = frequency), color = "red") + labs(x = "Age", y = "Freuency")
Table for 16 year old drug users:
age_16 <- drug_gathered3 %>%
filter(age == 16 & subs != "alcohol") %>%
select(-age)
head(age_16)
## n subs frequency use
## 1 3058 cocaine 7.0 1.0
## 2 3058 crack 1.0 0.0
## 3 3058 hallucinogen 3.0 3.4
## 4 3058 heroin 66.5 0.1
## 5 3058 inhalant 3.0 3.0
## 6 3058 marijuana 30.0 22.5
Bar plot for median drug use:
ggplot(age_16, aes(reorder(subs,-use),use), ylim = c(0,50) ) +
geom_bar(stat = "identity", color = "orange", fill = "white") +
labs(x = "Median Drug Use", y = "Drugs") +
coord_fixed(ratio = .2) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))