drug_use_data <- "https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv"
du <- read.csv(drug_use_data, stringsAsFactors = F)
head(du)
## age n alcohol.use alcohol.frequency marijuana.use marijuana.frequency
## 1 12 2798 3.9 3 1.1 4
## 2 13 2757 8.5 6 3.4 15
## 3 14 2792 18.1 5 8.7 24
## 4 15 2956 29.2 6 14.5 25
## 5 16 3058 40.1 10 22.5 30
## 6 17 3038 49.3 13 28.0 36
## cocaine.use cocaine.frequency crack.use crack.frequency heroin.use
## 1 0.1 5.0 0.0 - 0.1
## 2 0.1 1.0 0.0 3.0 0.0
## 3 0.1 5.5 0.0 - 0.1
## 4 0.5 4.0 0.1 9.5 0.2
## 5 1.0 7.0 0.0 1.0 0.1
## 6 2.0 5.0 0.1 21.0 0.1
## heroin.frequency hallucinogen.use hallucinogen.frequency inhalant.use
## 1 35.5 0.2 52 1.6
## 2 - 0.6 6 2.5
## 3 2.0 1.6 3 2.6
## 4 1.0 2.1 4 2.5
## 5 66.5 3.4 3 3.0
## 6 64.0 4.8 3 2.0
## inhalant.frequency pain.releiver.use pain.releiver.frequency
## 1 19.0 2.0 36
## 2 12.0 2.4 14
## 3 5.0 3.9 12
## 4 5.5 5.5 10
## 5 3.0 6.2 7
## 6 4.0 8.5 9
## oxycontin.use oxycontin.frequency tranquilizer.use
## 1 0.1 24.5 0.2
## 2 0.1 41.0 0.3
## 3 0.4 4.5 0.9
## 4 0.8 3.0 2.0
## 5 1.1 4.0 2.4
## 6 1.4 6.0 3.5
## tranquilizer.frequency stimulant.use stimulant.frequency meth.use
## 1 52.0 0.2 2.0 0.0
## 2 25.5 0.3 4.0 0.1
## 3 5.0 0.8 12.0 0.1
## 4 4.5 1.5 6.0 0.3
## 5 11.0 1.8 9.5 0.3
## 6 7.0 2.8 9.0 0.6
## meth.frequency sedative.use sedative.frequency
## 1 - 0.2 13.0
## 2 5.0 0.1 19.0
## 3 24.0 0.2 16.5
## 4 10.5 0.4 30.0
## 5 36.0 0.2 3.0
## 6 48.0 0.5 6.5
Looking at the column names we can see that age and n(number) are columns that can stay. The other columns are separated by drug.use (percentage of the age group that uses the drug) and drug.frequency (the median number of times the user in the age group used the drug in the past 12 months).
I would like to change this data set to a ‘Long’ format by keeping the columns ‘age’ and ‘n’, and then creating the columns ‘drug’, ‘percentage-used’, and ‘frequency-used’.
Display Column Names
#Display column names
colnames(du)
## [1] "age" "n"
## [3] "alcohol.use" "alcohol.frequency"
## [5] "marijuana.use" "marijuana.frequency"
## [7] "cocaine.use" "cocaine.frequency"
## [9] "crack.use" "crack.frequency"
## [11] "heroin.use" "heroin.frequency"
## [13] "hallucinogen.use" "hallucinogen.frequency"
## [15] "inhalant.use" "inhalant.frequency"
## [17] "pain.releiver.use" "pain.releiver.frequency"
## [19] "oxycontin.use" "oxycontin.frequency"
## [21] "tranquilizer.use" "tranquilizer.frequency"
## [23] "stimulant.use" "stimulant.frequency"
## [25] "meth.use" "meth.frequency"
## [27] "sedative.use" "sedative.frequency"
Create subset for drug.use and drug.frequency
use <- du %>%
select(c(age, n, ends_with("use"))) %>% #select the 'age', 'n', and any columns ending with 'use'
gather("drug", "percent_used", ends_with("use")) #create 'drug' and 'percent_used' columns
head(use)
## age n drug percent_used
## 1 12 2798 alcohol.use 3.9
## 2 13 2757 alcohol.use 8.5
## 3 14 2792 alcohol.use 18.1
## 4 15 2956 alcohol.use 29.2
## 5 16 3058 alcohol.use 40.1
## 6 17 3038 alcohol.use 49.3
freq <- du %>%
select(c(age, n, ends_with("frequency"))) %>% #select the 'age', 'n', and any columns ending with 'cy'
gather("drug", "frequency", ends_with("cy")) #create 'drug' and 'frequency' columns
head(freq)
## age n drug frequency
## 1 12 2798 alcohol.frequency 3
## 2 13 2757 alcohol.frequency 6
## 3 14 2792 alcohol.frequency 5
## 4 15 2956 alcohol.frequency 6
## 5 16 3058 alcohol.frequency 10
## 6 17 3038 alcohol.frequency 13
Strip the ‘.use’ and ‘.frequency’ from the end of the drug value.
use$drug <- gsub("(.use)$", "", use$drug)
head(use)
## age n drug percent_used
## 1 12 2798 alcohol 3.9
## 2 13 2757 alcohol 8.5
## 3 14 2792 alcohol 18.1
## 4 15 2956 alcohol 29.2
## 5 16 3058 alcohol 40.1
## 6 17 3038 alcohol 49.3
freq$drug <- gsub("(.frequency)$", "", freq$drug)
head(freq)
## age n drug frequency
## 1 12 2798 alcohol 3
## 2 13 2757 alcohol 6
## 3 14 2792 alcohol 5
## 4 15 2956 alcohol 6
## 5 16 3058 alcohol 10
## 6 17 3038 alcohol 13
Merge the ‘use’ and ‘freq’ data frames.
drug_use <- cbind(use, frequency=as.double(freq$frequency))
## Warning in cbind(use, frequency = as.double(freq$frequency)): NAs
## introduced by coercion
head(drug_use)
## age n drug percent_used frequency
## 1 12 2798 alcohol 3.9 3
## 2 13 2757 alcohol 8.5 6
## 3 14 2792 alcohol 18.1 5
## 4 15 2956 alcohol 29.2 6
## 5 16 3058 alcohol 40.1 10
## 6 17 3038 alcohol 49.3 13
al <- filter(drug_use, drug == "alcohol")
mar <- filter(drug_use, drug == "marijuana")
co <- filter(drug_use, drug == "cocaine")
cr <- filter(drug_use, drug == "crack")
her <- filter(drug_use, drug == "heroin")
hal <- filter(drug_use, drug == "hallucinogen")
inh <- filter(drug_use, drug == "inhalant")
pr <- filter(drug_use, drug == "pain.releiver")
ox <- filter(drug_use, drug == "oxycontin")
tr <- filter(drug_use, drug == "tranquilizer")
st <- filter(drug_use, drug == "stimulant")
meth <- filter(drug_use, drug == "meth")
sed <- filter(drug_use, drug == "sedative")
maxf <- max(drug_use$frequency, na.rm = T)
maxp <- max(drug_use$percent_used, na.rm = T)
ggplot(data=al, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Alcohol Use") +
ggtitle("Alcohol") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=al, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Alcohol") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=mar, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Marijuana Use") +
ggtitle("Marijuana") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=mar, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Marijuana") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=co, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Cocaine Use") +
ggtitle("Cocaine") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=co, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Cocaine") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).
ggplot(data=cr, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Crack Use") +
ggtitle("Crack") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=cr, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Crack") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
## Warning: Removed 3 rows containing missing values (position_stack).
## Warning: Removed 3 rows containing missing values (geom_text).
ggplot(data=her, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Heroin Use") +
ggtitle("Heroin") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=her, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Heroin") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
## Warning: Removed 5 rows containing missing values (position_stack).
## Warning: Removed 5 rows containing missing values (geom_text).
ggplot(data=hal, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Hallucinogen Use") +
ggtitle("Hallucinogen") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=hal, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Hallucinogen") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=inh, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Inhalant Use") +
ggtitle("Inhalant") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=inh, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Inhalant") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).
ggplot(data=pr, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Pain Reliever Use") +
ggtitle("Pain Reliever") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=pr, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Pain Reliever") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=ox, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Oxycontin Use") +
ggtitle("Oxycontin") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=ox, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Oxycontin") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_text).
ggplot(data=tr, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Tranquilizer Use") +
ggtitle("Tranquilizer") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=tr, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Tranquilizer") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=st, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Stimulant Use") +
ggtitle("Stimulant") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=st, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Stimulant") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=meth, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Meth Use") +
ggtitle("Meth") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=meth, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Meth") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
## Warning: Removed 2 rows containing missing values (position_stack).
## Warning: Removed 2 rows containing missing values (geom_text).
ggplot(data=sed, aes(x=age, y=percent_used, fill=age)) +
xlab("age group") +
ylab("Percentage of Sedative Use") +
ggtitle("Sedative") +
ylim(0, maxp) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=percent_used), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)
ggplot(data=sed, aes(x=age, y=frequency, fill=age)) +
xlab("age group") +
ylab("Times Used in Past Year") +
ggtitle("Sedative") +
ylim(0, maxf) +
geom_bar(colour="black", stat="identity") +
geom_text(aes(label=frequency), position = position_dodge(width = .9), vjust=-.25) +
guides(fill=F)