Loading libraries
library(tidyverse)
library(stringr)
library(ggthemes)
library(knitr)
opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=TRUE)
Tidying data
drug_use <- read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv",
stringsAsFactors = FALSE)
# creating a united variable for each drug
drug_use <- drug_use %>% unite(alcohol, alcohol.use, alcohol.frequency) %>%
unite(marijuana, marijuana.use, marijuana.frequency) %>%
unite(cocaine, cocaine.use, cocaine.frequency) %>% unite(crack,
crack.use, crack.frequency) %>% unite(heroin, heroin.use,
heroin.frequency) %>% unite(hallucinogen, hallucinogen.use,
hallucinogen.frequency) %>% unite(inhalant, inhalant.use,
inhalant.frequency) %>% unite("pain reliever", pain.releiver.use,
pain.releiver.frequency) %>% unite(oxycontin, oxycontin.use,
oxycontin.frequency) %>% unite(tranquilizer, tranquilizer.use,
tranquilizer.frequency) %>% unite(stimulant, stimulant.use,
stimulant.frequency) %>% unite(meth, meth.use, meth.frequency) %>%
unite(sedative, sedative.use, sedative.frequency)
drug_use <- drug_use %>% gather(3:15, key = "drug", value = "use_freq") %>%
separate(use_freq, into = c("use", "frequency"), sep = "_")
drug_use$use <- as.numeric(drug_use$use)
drug_use$drug <- as.factor(drug_use$drug)
Plotting data
ggplot(data = drug_use) + geom_point(mapping = aes(x = use, y = age,
color = drug)) + theme_wsj()