library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
my_data <- suppressWarnings(read.csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/drug-use-by-age/drug-use-by-age.csv", header = TRUE))
Disply Overview of Dataframe
glimpse(my_data)
## Rows: 17
## Columns: 28
## $ age <chr> "12", "13", "14", "15", "16", "17", "18", "19"…
## $ n <int> 2798, 2757, 2792, 2956, 3058, 3038, 2469, 2223…
## $ alcohol_use <dbl> 3.9, 8.5, 18.1, 29.2, 40.1, 49.3, 58.7, 64.6, …
## $ alcohol_frequency <dbl> 3, 6, 5, 6, 10, 13, 24, 36, 48, 52, 52, 52, 52…
## $ marijuana_use <dbl> 1.1, 3.4, 8.7, 14.5, 22.5, 28.0, 33.7, 33.4, 3…
## $ marijuana_frequency <dbl> 4, 15, 24, 25, 30, 36, 52, 60, 60, 52, 52, 60,…
## $ cocaine_use <dbl> 0.1, 0.1, 0.1, 0.5, 1.0, 2.0, 3.2, 4.1, 4.9, 4…
## $ cocaine_frequency <chr> "5.0", "1.0", "5.5", "4.0", "7.0", "5.0", "5.0…
## $ crack_use <dbl> 0.0, 0.0, 0.0, 0.1, 0.0, 0.1, 0.4, 0.5, 0.6, 0…
## $ crack_frequency <chr> "-", "3.0", "-", "9.5", "1.0", "21.0", "10.0",…
## $ heroin_use <dbl> 0.1, 0.0, 0.1, 0.2, 0.1, 0.1, 0.4, 0.5, 0.9, 0…
## $ heroin_frequency <chr> "35.5", "-", "2.0", "1.0", "66.5", "64.0", "46…
## $ hallucinogen_use <dbl> 0.2, 0.6, 1.6, 2.1, 3.4, 4.8, 7.0, 8.6, 7.4, 6…
## $ hallucinogen_frequency <dbl> 52, 6, 3, 4, 3, 3, 4, 3, 2, 4, 3, 2, 3, 2, 3, …
## $ inhalant_use <dbl> 1.6, 2.5, 2.6, 2.5, 3.0, 2.0, 1.8, 1.4, 1.5, 1…
## $ inhalant_frequency <chr> "19.0", "12.0", "5.0", "5.5", "3.0", "4.0", "4…
## $ pain_releiver_use <dbl> 2.0, 2.4, 3.9, 5.5, 6.2, 8.5, 9.2, 9.4, 10.0, …
## $ pain_releiver_frequency <dbl> 36, 14, 12, 10, 7, 9, 12, 12, 10, 15, 15, 15, …
## $ oxycontin_use <dbl> 0.1, 0.1, 0.4, 0.8, 1.1, 1.4, 1.7, 1.5, 1.7, 1…
## $ oxycontin_frequency <chr> "24.5", "41.0", "4.5", "3.0", "4.0", "6.0", "7…
## $ tranquilizer_use <dbl> 0.2, 0.3, 0.9, 2.0, 2.4, 3.5, 4.9, 4.2, 5.4, 3…
## $ tranquilizer_frequency <dbl> 52.0, 25.5, 5.0, 4.5, 11.0, 7.0, 12.0, 4.5, 10…
## $ stimulant_use <dbl> 0.2, 0.3, 0.8, 1.5, 1.8, 2.8, 3.0, 3.3, 4.0, 4…
## $ stimulant_frequency <dbl> 2.0, 4.0, 12.0, 6.0, 9.5, 9.0, 8.0, 6.0, 12.0,…
## $ meth_use <dbl> 0.0, 0.1, 0.1, 0.3, 0.3, 0.6, 0.5, 0.4, 0.9, 0…
## $ meth_frequency <chr> "-", "5.0", "24.0", "10.5", "36.0", "48.0", "1…
## $ sedative_use <dbl> 0.2, 0.1, 0.2, 0.4, 0.2, 0.5, 0.4, 0.3, 0.5, 0…
## $ sedative_frequency <dbl> 13.0, 19.0, 16.5, 30.0, 3.0, 6.5, 10.0, 6.0, 4…
Extract a subset from the Data-frame with certain columns.
my_data1 <- subset(my_data, select = c(age, n, alcohol_use,marijuana_use))
as.data.frame(my_data1)
## age n alcohol_use marijuana_use
## 1 12 2798 3.9 1.1
## 2 13 2757 8.5 3.4
## 3 14 2792 18.1 8.7
## 4 15 2956 29.2 14.5
## 5 16 3058 40.1 22.5
## 6 17 3038 49.3 28.0
## 7 18 2469 58.7 33.7
## 8 19 2223 64.6 33.4
## 9 20 2271 69.7 34.0
## 10 21 2354 83.2 33.0
## 11 22-23 4707 84.2 28.4
## 12 24-25 4591 83.1 24.9
## 13 26-29 2628 80.7 20.8
## 14 30-34 2864 77.5 16.4
## 15 35-49 7391 75.0 10.4
## 16 50-64 3923 67.2 7.3
## 17 65+ 2448 49.3 1.2
Extract only the row with age group of 50-54 that is baby boomers generation.
boomers_50_64 <- my_data1 %>%
filter(age == "50-64")
Filter all rows which are below 21 and assign them in boomers_50_64 variable.
underAgeDrugUse <- my_data1 %>%
filter(age <= 21)
underAgeDrugUse
## age n alcohol_use marijuana_use
## 1 12 2798 3.9 1.1
## 2 13 2757 8.5 3.4
## 3 14 2792 18.1 8.7
## 4 15 2956 29.2 14.5
## 5 16 3058 40.1 22.5
## 6 17 3038 49.3 28.0
## 7 18 2469 58.7 33.7
## 8 19 2223 64.6 33.4
## 9 20 2271 69.7 34.0
## 10 21 2354 83.2 33.0
Display baby boomers row.
boomers_50_64
## age n alcohol_use marijuana_use
## 1 50-64 3923 67.2 7.3
Compare two age groups those who use alcohol and marijuana.
drug_comparison <- data.frame(
age_group = c("Below 21", "50-64"),
alcohol_use = c(mean(my_data1$alcohol_use[my_data1$age < 21], na.rm = TRUE), my_data1$alcohol_use[my_data1$age == "50-64"]),
marijuana_use = c(mean(my_data1$marijuana_use[my_data1$age < 21], na.rm = TRUE), my_data1$marijuana_use[my_data1$age == "50-64"])
)
drug_comparison
## age_group alcohol_use marijuana_use
## 1 Below 21 38.01111 19.92222
## 2 50-64 67.20000 7.30000
Bar Plot for alcohol use
ggplot(drug_comparison, aes(x = age_group, y = alcohol_use, fill = age_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Alcohol Use Between Age Groups",
x = "Age Group",
y = "Mean Alcohol Use") +
theme_minimal()
Bar plot for marijuana use.
ggplot(drug_comparison, aes(x = age_group, y = marijuana_use, fill = age_group)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Marijuana Use Between Age Groups",
x = "Age Group",
y = "Mean Marijuana Use") +
theme_minimal()
In summary, comparing alcohol and marijuana use between individuals under 21 and the baby boomer generation (50-64 age group) reveals that, on average, those under 21 report higher marijuana use, while baby boomers report higher alcohol use. These findings emphasize the need for age-specific insights in shaping interventions and policies. Considering the limitations of mean values, further analysis may require for a comprehensive understanding of drug use patterns across age groups. Additionally, exploring the correlation between baby boomers and alcohol use may be a subject for future study.