url_drugs <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv"
drug <- read.csv(url_drugs, stringsAsFactors = F)


Renaming some columns to make separating with “.” easier:

drug2 <- drug %>%
  rename("pain releiver.use" = pain.releiver.use,
         "pain releiver.frequency" = pain.releiver.frequency)
knitr::kable(head(drug2, 10))
age n alcohol.use alcohol.frequency marijuana.use marijuana.frequency cocaine.use cocaine.frequency crack.use crack.frequency heroin.use heroin.frequency hallucinogen.use hallucinogen.frequency inhalant.use inhalant.frequency pain releiver.use pain releiver.frequency oxycontin.use oxycontin.frequency tranquilizer.use tranquilizer.frequency stimulant.use stimulant.frequency meth.use meth.frequency sedative.use sedative.frequency
12 2798 3.9 3 1.1 4 0.1 5.0 0.0 - 0.1 35.5 0.2 52 1.6 19.0 2.0 36 0.1 24.5 0.2 52.0 0.2 2.0 0.0 - 0.2 13.0
13 2757 8.5 6 3.4 15 0.1 1.0 0.0 3.0 0.0 - 0.6 6 2.5 12.0 2.4 14 0.1 41.0 0.3 25.5 0.3 4.0 0.1 5.0 0.1 19.0
14 2792 18.1 5 8.7 24 0.1 5.5 0.0 - 0.1 2.0 1.6 3 2.6 5.0 3.9 12 0.4 4.5 0.9 5.0 0.8 12.0 0.1 24.0 0.2 16.5
15 2956 29.2 6 14.5 25 0.5 4.0 0.1 9.5 0.2 1.0 2.1 4 2.5 5.5 5.5 10 0.8 3.0 2.0 4.5 1.5 6.0 0.3 10.5 0.4 30.0
16 3058 40.1 10 22.5 30 1.0 7.0 0.0 1.0 0.1 66.5 3.4 3 3.0 3.0 6.2 7 1.1 4.0 2.4 11.0 1.8 9.5 0.3 36.0 0.2 3.0
17 3038 49.3 13 28.0 36 2.0 5.0 0.1 21.0 0.1 64.0 4.8 3 2.0 4.0 8.5 9 1.4 6.0 3.5 7.0 2.8 9.0 0.6 48.0 0.5 6.5
18 2469 58.7 24 33.7 52 3.2 5.0 0.4 10.0 0.4 46.0 7.0 4 1.8 4.0 9.2 12 1.7 7.0 4.9 12.0 3.0 8.0 0.5 12.0 0.4 10.0
19 2223 64.6 36 33.4 60 4.1 5.5 0.5 2.0 0.5 180.0 8.6 3 1.4 3.0 9.4 12 1.5 7.5 4.2 4.5 3.3 6.0 0.4 105.0 0.3 6.0
20 2271 69.7 48 34.0 60 4.9 8.0 0.6 5.0 0.9 45.0 7.4 2 1.5 4.0 10.0 10 1.7 12.0 5.4 10.0 4.0 12.0 0.9 12.0 0.5 4.0
21 2354 83.2 52 33.0 52 4.8 5.0 0.5 17.0 0.6 30.0 6.3 4 1.4 2.0 9.0 15 1.3 13.5 3.9 7.0 4.1 10.0 0.6 2.0 0.3 9.0


Gathering columns, under key and values:

drug_gathered <- suppressWarnings(drug2 %>% 
  gather(key, value, -age, -n) %>% 
  arrange(age))
  

head(drug_gathered, 10)
##    age    n                 key value
## 1   12 2798         alcohol.use   3.9
## 2   12 2798   alcohol.frequency     3
## 3   12 2798       marijuana.use   1.1
## 4   12 2798 marijuana.frequency     4
## 5   12 2798         cocaine.use   0.1
## 6   12 2798   cocaine.frequency   5.0
## 7   12 2798           crack.use     0
## 8   12 2798     crack.frequency     -
## 9   12 2798          heroin.use   0.1
## 10  12 2798    heroin.frequency  35.5


Separating key into subs and variables before spreading variables into 2 columns:

drug_gathered2 <- drug_gathered %>% 
  separate(key, into = c("subs", "variables"), sep = "\\." )
head(drug_gathered2,10)
##    age    n      subs variables value
## 1   12 2798   alcohol       use   3.9
## 2   12 2798   alcohol frequency     3
## 3   12 2798 marijuana       use   1.1
## 4   12 2798 marijuana frequency     4
## 5   12 2798   cocaine       use   0.1
## 6   12 2798   cocaine frequency   5.0
## 7   12 2798     crack       use     0
## 8   12 2798     crack frequency     -
## 9   12 2798    heroin       use   0.1
## 10  12 2798    heroin frequency  35.5


Spreading variables into use and frequency columns:

drug_gathered3 <- drug_gathered2 %>% 
  spread(variables, value)
  

knitr::kable(head(drug_gathered3, 15))
age n subs frequency use
12 2798 alcohol 3 3.9
12 2798 cocaine 5.0 0.1
12 2798 crack - 0
12 2798 hallucinogen 52 0.2
12 2798 heroin 35.5 0.1
12 2798 inhalant 19.0 1.6
12 2798 marijuana 4 1.1
12 2798 meth - 0
12 2798 oxycontin 24.5 0.1
12 2798 pain releiver 36 2
12 2798 sedative 13 0.2
12 2798 stimulant 2 0.2
12 2798 tranquilizer 52 0.2
13 2757 alcohol 6 8.5
13 2757 cocaine 1.0 0.1


Table alcohol use:

drug_gathered3 <- suppressWarnings(drug_gathered3 %>% 
  mutate(use = as.numeric(use),
         frequency = as.numeric(frequency)))

alcohol <- drug_gathered3 %>% 
  filter(subs=="alcohol") %>% 
  select(-subs)

head(alcohol)
##   age    n frequency  use
## 1  12 2798         3  3.9
## 2  13 2757         6  8.5
## 3  14 2792         5 18.1
## 4  15 2956         6 29.2
## 5  16 3058        10 40.1
## 6  17 3038        13 49.3


Association between age and alcohol usage - frequency and age:

ggplot(alcohol) + geom_point(aes(age, y = use), color = "blue") + labs(x = "Age", y = "Median Use")

ggplot(alcohol) + geom_point(aes(age, y = frequency), color = "red") + labs(x = "Age", y = "Freuency")


Table for 16 year old drug users:

age_16 <- drug_gathered3 %>% 
  filter(age == 16 & subs != "alcohol") %>% 
  select(-age) 
head(age_16)
##      n         subs frequency  use
## 1 3058      cocaine       7.0  1.0
## 2 3058        crack       1.0  0.0
## 3 3058 hallucinogen       3.0  3.4
## 4 3058       heroin      66.5  0.1
## 5 3058     inhalant       3.0  3.0
## 6 3058    marijuana      30.0 22.5


Bar plot for median drug use:

ggplot(age_16, aes(reorder(subs,-use),use), ylim = c(0,50) ) + 
  geom_bar(stat = "identity", color = "orange", fill = "white") +
  labs(x = "Median Drug Use", y = "Drugs") +
  coord_fixed(ratio = .2) +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))