Load libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

Import data file as CSV

my_data <- suppressWarnings(read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv", header = TRUE))

Disply Overview of Dataframe

glimpse(my_data)
## Rows: 17
## Columns: 28
## $ age                     <chr> "12", "13", "14", "15", "16", "17", "18", "19"…
## $ n                       <int> 2798, 2757, 2792, 2956, 3058, 3038, 2469, 2223…
## $ alcohol_use             <dbl> 3.9, 8.5, 18.1, 29.2, 40.1, 49.3, 58.7, 64.6, …
## $ alcohol_frequency       <dbl> 3, 6, 5, 6, 10, 13, 24, 36, 48, 52, 52, 52, 52…
## $ marijuana_use           <dbl> 1.1, 3.4, 8.7, 14.5, 22.5, 28.0, 33.7, 33.4, 3…
## $ marijuana_frequency     <dbl> 4, 15, 24, 25, 30, 36, 52, 60, 60, 52, 52, 60,…
## $ cocaine_use             <dbl> 0.1, 0.1, 0.1, 0.5, 1.0, 2.0, 3.2, 4.1, 4.9, 4…
## $ cocaine_frequency       <chr> "5.0", "1.0", "5.5", "4.0", "7.0", "5.0", "5.0…
## $ crack_use               <dbl> 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.4, 0.5, 0.6, 0…
## $ crack_frequency         <chr> "-", "3.0", "-", "9.5", "1.0", "21.0", "10.0",…
## $ heroin_use              <dbl> 0.1, 0.0, 0.1, 0.2, 0.1, 0.1, 0.4, 0.5, 0.9, 0…
## $ heroin_frequency        <chr> "35.5", "-", "2.0", "1.0", "66.5", "64.0", "46…
## $ hallucinogen_use        <dbl> 0.2, 0.6, 1.6, 2.1, 3.4, 4.8, 7.0, 8.6, 7.4, 6…
## $ hallucinogen_frequency  <dbl> 52, 6, 3, 4, 3, 3, 4, 3, 2, 4, 3, 2, 3, 2, 3, …
## $ inhalant_use            <dbl> 1.6, 2.5, 2.6, 2.5, 3.0, 2.0, 1.8, 1.4, 1.5, 1…
## $ inhalant_frequency      <chr> "19.0", "12.0", "5.0", "5.5", "3.0", "4.0", "4…
## $ pain_releiver_use       <dbl> 2.0, 2.4, 3.9, 5.5, 6.2, 8.5, 9.2, 9.4, 10.0, …
## $ pain_releiver_frequency <dbl> 36, 14, 12, 10, 7, 9, 12, 12, 10, 15, 15, 15, …
## $ oxycontin_use           <dbl> 0.1, 0.1, 0.4, 0.8, 1.1, 1.4, 1.7, 1.5, 1.7, 1…
## $ oxycontin_frequency     <chr> "24.5", "41.0", "4.5", "3.0", "4.0", "6.0", "7…
## $ tranquilizer_use        <dbl> 0.2, 0.3, 0.9, 2.0, 2.4, 3.5, 4.9, 4.2, 5.4, 3…
## $ tranquilizer_frequency  <dbl> 52.0, 25.5, 5.0, 4.5, 11.0, 7.0, 12.0, 4.5, 10…
## $ stimulant_use           <dbl> 0.2, 0.3, 0.8, 1.5, 1.8, 2.8, 3.0, 3.3, 4.0, 4…
## $ stimulant_frequency     <dbl> 2.0, 4.0, 12.0, 6.0, 9.5, 9.0, 8.0, 6.0, 12.0,…
## $ meth_use                <dbl> 0.0, 0.1, 0.1, 0.3, 0.3, 0.6, 0.5, 0.4, 0.9, 0…
## $ meth_frequency          <chr> "-", "5.0", "24.0", "10.5", "36.0", "48.0", "1…
## $ sedative_use            <dbl> 0.2, 0.1, 0.2, 0.4, 0.2, 0.5, 0.4, 0.3, 0.5, 0…
## $ sedative_frequency      <dbl> 13.0, 19.0, 16.5, 30.0, 3.0, 6.5, 10.0, 6.0, 4…

Extract a subset from the Data-frame with certain columns.

my_data1 <- subset(my_data, select = c(age, n, alcohol_use,marijuana_use))
as.data.frame(my_data1)
##      age    n alcohol_use marijuana_use
## 1     12 2798         3.9           1.1
## 2     13 2757         8.5           3.4
## 3     14 2792        18.1           8.7
## 4     15 2956        29.2          14.5
## 5     16 3058        40.1          22.5
## 6     17 3038        49.3          28.0
## 7     18 2469        58.7          33.7
## 8     19 2223        64.6          33.4
## 9     20 2271        69.7          34.0
## 10    21 2354        83.2          33.0
## 11 22-23 4707        84.2          28.4
## 12 24-25 4591        83.1          24.9
## 13 26-29 2628        80.7          20.8
## 14 30-34 2864        77.5          16.4
## 15 35-49 7391        75.0          10.4
## 16 50-64 3923        67.2           7.3
## 17   65+ 2448        49.3           1.2

Extract only the row with age group of 50-54 that is baby boomers generation.

boomers_50_64 <- my_data1 %>%
  filter(age == "50-64")

Filter all rows which are below 21 and assign them in boomers_50_64 variable.

underAgeDrugUse <- my_data1 %>% 
    filter(age <= 21) 
underAgeDrugUse
##    age    n alcohol_use marijuana_use
## 1   12 2798         3.9           1.1
## 2   13 2757         8.5           3.4
## 3   14 2792        18.1           8.7
## 4   15 2956        29.2          14.5
## 5   16 3058        40.1          22.5
## 6   17 3038        49.3          28.0
## 7   18 2469        58.7          33.7
## 8   19 2223        64.6          33.4
## 9   20 2271        69.7          34.0
## 10  21 2354        83.2          33.0

Display baby boomers row.

boomers_50_64
##     age    n alcohol_use marijuana_use
## 1 50-64 3923        67.2           7.3

Compare two age groups those who use alcohol and marijuana.

drug_comparison <- data.frame(
  age_group = c("Below 21", "50-64"),
  alcohol_use = c(mean(my_data1$alcohol_use[my_data1$age < 21], na.rm = TRUE), my_data1$alcohol_use[my_data1$age == "50-64"]),
  marijuana_use = c(mean(my_data1$marijuana_use[my_data1$age < 21], na.rm = TRUE), my_data1$marijuana_use[my_data1$age == "50-64"])
  
)
drug_comparison
##   age_group alcohol_use marijuana_use
## 1  Below 21    38.01111      19.92222
## 2     50-64    67.20000       7.30000

Bar Plot for alcohol use

ggplot(drug_comparison, aes(x = age_group, y = alcohol_use, fill = age_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Comparison of Alcohol Use Between Age Groups",
       x = "Age Group",
       y = "Mean Alcohol Use") +
  theme_minimal()

Bar plot for marijuana use.

ggplot(drug_comparison, aes(x = age_group, y = marijuana_use, fill = age_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Comparison of Marijuana Use Between Age Groups",
       x = "Age Group",
       y = "Mean Marijuana Use") +
  theme_minimal()

In summary, comparing alcohol and marijuana use between individuals under 21 and the baby boomer generation (50-64 age group) reveals that, on average, those under 21 report higher marijuana use, while baby boomers report higher alcohol use. These findings emphasize the need for age-specific insights in shaping interventions and policies. Considering the limitations of mean values, further analysis may require for a comprehensive understanding of drug use patterns across age groups. Additionally, exploring the correlation between baby boomers and alcohol use may be a subject for future study.