Raw Drug-Use Data

Background on the Data

I am tidying a fivethirtyeight dataset on drug use Github. I am transforming the columns of drugs (3 to 28) into rows, in order to perform analysis on the different drugs used by different age groups. It is important to note that any column header in the dataset with the word ‘use’ is referring to a percentage of those in an age group who have used the specific drug in the past 12 months. Any column header in the dataset with the word ‘frequency’ is referring to the median number of times a user in an age group used the specific drug in the past 12 months. For this analysis, I am only interested in the percentage the drug is used among different age groups.

Loading Data

library(RCurl)
## Loading required package: bitops
raw_drug <- read.csv(text=getURL("https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv"), header=TRUE, stringsAsFactors = FALSE)
head (raw_drug)
##   age    n alcohol.use alcohol.frequency marijuana.use marijuana.frequency
## 1  12 2798         3.9                 3           1.1                   4
## 2  13 2757         8.5                 6           3.4                  15
## 3  14 2792        18.1                 5           8.7                  24
## 4  15 2956        29.2                 6          14.5                  25
## 5  16 3058        40.1                10          22.5                  30
## 6  17 3038        49.3                13          28.0                  36
##   cocaine.use cocaine.frequency crack.use crack.frequency heroin.use
## 1         0.1               5.0       0.0               -        0.1
## 2         0.1               1.0       0.0             3.0        0.0
## 3         0.1               5.5       0.0               -        0.1
## 4         0.5               4.0       0.1             9.5        0.2
## 5         1.0               7.0       0.0             1.0        0.1
## 6         2.0               5.0       0.1            21.0        0.1
##   heroin.frequency hallucinogen.use hallucinogen.frequency inhalant.use
## 1             35.5              0.2                     52          1.6
## 2                -              0.6                      6          2.5
## 3              2.0              1.6                      3          2.6
## 4              1.0              2.1                      4          2.5
## 5             66.5              3.4                      3          3.0
## 6             64.0              4.8                      3          2.0
##   inhalant.frequency pain.releiver.use pain.releiver.frequency
## 1               19.0               2.0                      36
## 2               12.0               2.4                      14
## 3                5.0               3.9                      12
## 4                5.5               5.5                      10
## 5                3.0               6.2                       7
## 6                4.0               8.5                       9
##   oxycontin.use oxycontin.frequency tranquilizer.use
## 1           0.1                24.5              0.2
## 2           0.1                41.0              0.3
## 3           0.4                 4.5              0.9
## 4           0.8                 3.0              2.0
## 5           1.1                 4.0              2.4
## 6           1.4                 6.0              3.5
##   tranquilizer.frequency stimulant.use stimulant.frequency meth.use
## 1                   52.0           0.2                 2.0      0.0
## 2                   25.5           0.3                 4.0      0.1
## 3                    5.0           0.8                12.0      0.1
## 4                    4.5           1.5                 6.0      0.3
## 5                   11.0           1.8                 9.5      0.3
## 6                    7.0           2.8                 9.0      0.6
##   meth.frequency sedative.use sedative.frequency
## 1              -          0.2               13.0
## 2            5.0          0.1               19.0
## 3           24.0          0.2               16.5
## 4           10.5          0.4               30.0
## 5           36.0          0.2                3.0
## 6           48.0          0.5                6.5

Tidy & Transform

I am using the packages tidyr and dplyr to change the raw data to its final form.

library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:RCurl':
## 
##     complete
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
drug_gather <- gather(raw_drug, "Drug", "Percent", 3:28)
drug_gather$Percent <- as.numeric(drug_gather$Percent)
## Warning: NAs introduced by coercion
drug_separate <- separate(drug_gather, "Drug", c("Drug", "Usage"))
## Warning: Expected 2 pieces. Additional pieces discarded in 34 rows [239,
## 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
## 255, 256, 257, 258, ...].
drug_filter <- filter(drug_separate, Usage=='use')

names (drug_filter) <- c("Age", "Size", "Drug", "Usage", "Percent")

head(drug_filter)
##   Age Size    Drug Usage Percent
## 1  12 2798 alcohol   use     3.9
## 2  13 2757 alcohol   use     8.5
## 3  14 2792 alcohol   use    18.1
## 4  15 2956 alcohol   use    29.2
## 5  16 3058 alcohol   use    40.1
## 6  17 3038 alcohol   use    49.3

Analysis

Teenagers

I am using the package ggplot to illustrate the differences found in drug use between teenagers and young adults.

library(ggplot2)
teen <- ggplot(subset(drug_filter, Age %in% c("13","14","15", "16", "17", "18","19")), 
aes(x=Drug, y=Percent, fill=Age)) + geom_bar(stat="identity", position=position_dodge()) + theme(axis.text.x = element_text(angle = 90, hjust = 1))

teen

Young Adults

young_adults <- ggplot(subset(drug_filter, Age %in% c("18","19","20","21","22-23","24-25")), 
aes(x=Drug, y=Percent, fill=Age)) + geom_bar(stat="identity", position=position_dodge()) + theme(axis.text.x = element_text(angle = 90, hjust = 1))


young_adults

Task 2 by Don P.

Introduction

I will be working on the original dataset for my part of the extention. The data set Drug Use by Age is part of the article How Baby Boomers Get High It covers 13 drugs across 17 age groups. Source of the data: National Survey on Drug Use and Health from the Substance Abuse and Mental Health Data Archive

raw_drug
##      age    n alcohol.use alcohol.frequency marijuana.use
## 1     12 2798         3.9                 3           1.1
## 2     13 2757         8.5                 6           3.4
## 3     14 2792        18.1                 5           8.7
## 4     15 2956        29.2                 6          14.5
## 5     16 3058        40.1                10          22.5
## 6     17 3038        49.3                13          28.0
## 7     18 2469        58.7                24          33.7
## 8     19 2223        64.6                36          33.4
## 9     20 2271        69.7                48          34.0
## 10    21 2354        83.2                52          33.0
## 11 22-23 4707        84.2                52          28.4
## 12 24-25 4591        83.1                52          24.9
## 13 26-29 2628        80.7                52          20.8
## 14 30-34 2864        77.5                52          16.4
## 15 35-49 7391        75.0                52          10.4
## 16 50-64 3923        67.2                52           7.3
## 17   65+ 2448        49.3                52           1.2
##    marijuana.frequency cocaine.use cocaine.frequency crack.use
## 1                    4         0.1               5.0       0.0
## 2                   15         0.1               1.0       0.0
## 3                   24         0.1               5.5       0.0
## 4                   25         0.5               4.0       0.1
## 5                   30         1.0               7.0       0.0
## 6                   36         2.0               5.0       0.1
## 7                   52         3.2               5.0       0.4
## 8                   60         4.1               5.5       0.5
## 9                   60         4.9               8.0       0.6
## 10                  52         4.8               5.0       0.5
## 11                  52         4.5               5.0       0.5
## 12                  60         4.0               6.0       0.5
## 13                  52         3.2               5.0       0.4
## 14                  72         2.1               8.0       0.5
## 15                  48         1.5              15.0       0.5
## 16                  52         0.9              36.0       0.4
## 17                  36         0.0                 -       0.0
##    crack.frequency heroin.use heroin.frequency hallucinogen.use
## 1                -        0.1             35.5              0.2
## 2              3.0        0.0                -              0.6
## 3                -        0.1              2.0              1.6
## 4              9.5        0.2              1.0              2.1
## 5              1.0        0.1             66.5              3.4
## 6             21.0        0.1             64.0              4.8
## 7             10.0        0.4             46.0              7.0
## 8              2.0        0.5            180.0              8.6
## 9              5.0        0.9             45.0              7.4
## 10            17.0        0.6             30.0              6.3
## 11             5.0        1.1             57.5              5.2
## 12             6.0        0.7             88.0              4.5
## 13             6.0        0.6             50.0              3.2
## 14            15.0        0.4             66.0              1.8
## 15            48.0        0.1            280.0              0.6
## 16            62.0        0.1             41.0              0.3
## 17               -        0.0            120.0              0.1
##    hallucinogen.frequency inhalant.use inhalant.frequency
## 1                      52          1.6               19.0
## 2                       6          2.5               12.0
## 3                       3          2.6                5.0
## 4                       4          2.5                5.5
## 5                       3          3.0                3.0
## 6                       3          2.0                4.0
## 7                       4          1.8                4.0
## 8                       3          1.4                3.0
## 9                       2          1.5                4.0
## 10                      4          1.4                2.0
## 11                      3          1.0                4.0
## 12                      2          0.8                2.0
## 13                      3          0.6                4.0
## 14                      2          0.4                3.5
## 15                      3          0.3               10.0
## 16                     44          0.2               13.5
## 17                      2          0.0                  -
##    pain.releiver.use pain.releiver.frequency oxycontin.use
## 1                2.0                      36           0.1
## 2                2.4                      14           0.1
## 3                3.9                      12           0.4
## 4                5.5                      10           0.8
## 5                6.2                       7           1.1
## 6                8.5                       9           1.4
## 7                9.2                      12           1.7
## 8                9.4                      12           1.5
## 9               10.0                      10           1.7
## 10               9.0                      15           1.3
## 11              10.0                      15           1.7
## 12               9.0                      15           1.3
## 13               8.3                      13           1.2
## 14               5.9                      22           0.9
## 15               4.2                      12           0.3
## 16               2.5                      12           0.4
## 17               0.6                      24           0.0
##    oxycontin.frequency tranquilizer.use tranquilizer.frequency
## 1                 24.5              0.2                   52.0
## 2                 41.0              0.3                   25.5
## 3                  4.5              0.9                    5.0
## 4                  3.0              2.0                    4.5
## 5                  4.0              2.4                   11.0
## 6                  6.0              3.5                    7.0
## 7                  7.0              4.9                   12.0
## 8                  7.5              4.2                    4.5
## 9                 12.0              5.4                   10.0
## 10                13.5              3.9                    7.0
## 11                17.5              4.4                   12.0
## 12                20.0              4.3                   10.0
## 13                13.5              4.2                   10.0
## 14                46.0              3.6                    8.0
## 15                12.0              1.9                    6.0
## 16                 5.0              1.4                   10.0
## 17                   -              0.2                    5.0
##    stimulant.use stimulant.frequency meth.use meth.frequency sedative.use
## 1            0.2                 2.0      0.0              -          0.2
## 2            0.3                 4.0      0.1            5.0          0.1
## 3            0.8                12.0      0.1           24.0          0.2
## 4            1.5                 6.0      0.3           10.5          0.4
## 5            1.8                 9.5      0.3           36.0          0.2
## 6            2.8                 9.0      0.6           48.0          0.5
## 7            3.0                 8.0      0.5           12.0          0.4
## 8            3.3                 6.0      0.4          105.0          0.3
## 9            4.0                12.0      0.9           12.0          0.5
## 10           4.1                10.0      0.6            2.0          0.3
## 11           3.6                10.0      0.6           46.0          0.2
## 12           2.6                10.0      0.7           21.0          0.2
## 13           2.3                 7.0      0.6           30.0          0.4
## 14           1.4                12.0      0.4           54.0          0.4
## 15           0.6                24.0      0.2          104.0          0.3
## 16           0.3                24.0      0.2           30.0          0.2
## 17           0.0               364.0      0.0              -          0.0
##    sedative.frequency
## 1                13.0
## 2                19.0
## 3                16.5
## 4                30.0
## 5                 3.0
## 6                 6.5
## 7                10.0
## 8                 6.0
## 9                 4.0
## 10                9.0
## 11               52.0
## 12               17.5
## 13                4.0
## 14               10.0
## 15               10.0
## 16              104.0
## 17               15.0

tidying the datafram

I decided to rename some columns using rename() function.

drug_usage <- raw_drug %>%
  rename(sample.size = n, alcohol = alcohol.use, marijuana = marijuana.use, cocaine = cocaine.use,crack = crack.use, heroin = heroin.use, hallucinogen = hallucinogen.use, inhalant = inhalant.use, pain.releiver = pain.releiver.use, oxycontin = oxycontin.use, tranquilizer=tranquilizer.use, stimulant=stimulant.use, meth=meth.use, sedative=sedative.use)
head(drug_usage)
##   age sample.size alcohol alcohol.frequency marijuana marijuana.frequency
## 1  12        2798     3.9                 3       1.1                   4
## 2  13        2757     8.5                 6       3.4                  15
## 3  14        2792    18.1                 5       8.7                  24
## 4  15        2956    29.2                 6      14.5                  25
## 5  16        3058    40.1                10      22.5                  30
## 6  17        3038    49.3                13      28.0                  36
##   cocaine cocaine.frequency crack crack.frequency heroin heroin.frequency
## 1     0.1               5.0   0.0               -    0.1             35.5
## 2     0.1               1.0   0.0             3.0    0.0                -
## 3     0.1               5.5   0.0               -    0.1              2.0
## 4     0.5               4.0   0.1             9.5    0.2              1.0
## 5     1.0               7.0   0.0             1.0    0.1             66.5
## 6     2.0               5.0   0.1            21.0    0.1             64.0
##   hallucinogen hallucinogen.frequency inhalant inhalant.frequency
## 1          0.2                     52      1.6               19.0
## 2          0.6                      6      2.5               12.0
## 3          1.6                      3      2.6                5.0
## 4          2.1                      4      2.5                5.5
## 5          3.4                      3      3.0                3.0
## 6          4.8                      3      2.0                4.0
##   pain.releiver pain.releiver.frequency oxycontin oxycontin.frequency
## 1           2.0                      36       0.1                24.5
## 2           2.4                      14       0.1                41.0
## 3           3.9                      12       0.4                 4.5
## 4           5.5                      10       0.8                 3.0
## 5           6.2                       7       1.1                 4.0
## 6           8.5                       9       1.4                 6.0
##   tranquilizer tranquilizer.frequency stimulant stimulant.frequency meth
## 1          0.2                   52.0       0.2                 2.0  0.0
## 2          0.3                   25.5       0.3                 4.0  0.1
## 3          0.9                    5.0       0.8                12.0  0.1
## 4          2.0                    4.5       1.5                 6.0  0.3
## 5          2.4                   11.0       1.8                 9.5  0.3
## 6          3.5                    7.0       2.8                 9.0  0.6
##   meth.frequency sedative sedative.frequency
## 1              -      0.2               13.0
## 2            5.0      0.1               19.0
## 3           24.0      0.2               16.5
## 4           10.5      0.4               30.0
## 5           36.0      0.2                3.0
## 6           48.0      0.5                6.5

As my second step I got rid of drug frequency columns using select() function as I only wanted to keep the drug usage column for my further analysis.

library(dplyr)
drug_usage <- drug_usage %>%
  select(-c(4, 6,8,10,12,14,16,18, 20, 22,24,26,28))
head(drug_usage)
##   age sample.size alcohol marijuana cocaine crack heroin hallucinogen
## 1  12        2798     3.9       1.1     0.1   0.0    0.1          0.2
## 2  13        2757     8.5       3.4     0.1   0.0    0.0          0.6
## 3  14        2792    18.1       8.7     0.1   0.0    0.1          1.6
## 4  15        2956    29.2      14.5     0.5   0.1    0.2          2.1
## 5  16        3058    40.1      22.5     1.0   0.0    0.1          3.4
## 6  17        3038    49.3      28.0     2.0   0.1    0.1          4.8
##   inhalant pain.releiver oxycontin tranquilizer stimulant meth sedative
## 1      1.6           2.0       0.1          0.2       0.2  0.0      0.2
## 2      2.5           2.4       0.1          0.3       0.3  0.1      0.1
## 3      2.6           3.9       0.4          0.9       0.8  0.1      0.2
## 4      2.5           5.5       0.8          2.0       1.5  0.3      0.4
## 5      3.0           6.2       1.1          2.4       1.8  0.3      0.2
## 6      2.0           8.5       1.4          3.5       2.8  0.6      0.5

For my next step I used gather() function to make my wide data into key-value pairs of Drug and Percent

tidy_drug_usage <- drug_usage %>%
  gather("Drug", "Percent", c(3:15))

head(tidy_drug_usage)
##   age sample.size    Drug Percent
## 1  12        2798 alcohol     3.9
## 2  13        2757 alcohol     8.5
## 3  14        2792 alcohol    18.1
## 4  15        2956 alcohol    29.2
## 5  16        3058 alcohol    40.1
## 6  17        3038 alcohol    49.3

Visualization

For the easy visualization of the data that I just tidied, I am using age variable in three categories.
1. Teens: 13-19
2. Young Adults: 20-25
3. Adults: 26-65+

Teens <- ggplot(subset(drug_filter, Age %in% c("13","14","15","16","17","18", "19")), 
aes(x=Drug, y=Percent, fill=Age)) + geom_bar(stat="identity", position=position_dodge()) + theme(axis.text.x = element_text(angle = 90, hjust = 1))+
coord_flip()

Teens

library(ggplot2)
young_adults <- ggplot(subset(drug_filter, Age %in% c("20","21","22-23","24-25")), 
aes(x=Drug, y=Percent, fill=Age)) + geom_bar(stat="identity", position=position_dodge()) + theme(axis.text.x = element_text(angle = 90, hjust = 1))+
coord_flip()

young_adults

Adults <- ggplot(subset(drug_filter, Age %in% c("26-29","30-34","35-49","50-64","65+")), 
aes(x=Drug, y=Percent, fill=Age)) + geom_bar(stat="identity", position=position_dodge()) + theme(axis.text.x = element_text(angle = 90, hjust = 1))+
coord_flip()

Adults

conclution: Just by looking at these plots we can conclude that for all three age categories, alcohol is the highest consumed drug. And Marijuana makes the second highest in the list for all three age categories.