library(tidyverse)
library(haven)
library(ggthemes)
ggplot_scale <- c("navyblue", "tan4", "chartreuse4", "blueviolet", "blue", "deeppink4", "goldenrod", "violetred", "turquoise2", "lightgreen", "lightpink1", "yellow1", "slategrey", "peachpuff2", "mediumorchid4", "mediumspringgreen", "tomato")

Assignment

Use the complete (and cleaned) dataset of “Teen Drug Use” to answer the following questions: (you can use the same Word doc as your 200 word paragraph, above).

teen_druggies <- read_sav("data/teen\ drug\ use.sav")

teen_druggies$survey <- as.factor(teen_druggies$survey)
  1. How many Latinos were in this group?
nrow(teen_druggies %>% filter(ethnicla == 1))
## [1] 31
  1. What percentage of the sample reflected African Americans?
afros <- teen_druggies %>% filter(ethnicaa == 1)
paste0(round(100*(nrow(afros)/nrow(teen_druggies)), 1), "%")
## [1] "18.3%"
  1. What was the most commonly used drug? What percentage of teens had used it in the last year?
drug_totals <- teen_druggies %>% 
  select(alcohol, heroin, marijuan, 
         cocaine, ecstasy, LSD, 
         poppers, crack, ghb,
         crystal_meth) %>%
  replace(., .==2, 0) %>%
  summarize(across(everything(),sum)) 

which.max(drug_totals)
## alcohol 
##       1
paste0(round(max(drug_totals)/nrow(teen_druggies) * 100), "%")
## [1] "79%"
  1. What percentage of teens reported having ever been to college?
collegiate <- teen_druggies %>% 
  filter(school == 8) %>%
  nrow()

paste0(round(collegiate/nrow(teen_druggies) * 100), "%")
## [1] "4%"
  1. Do basic math to figure out the following number: What percentage thought it was either “unlikely” or “very unlikely” that they were infected with the Hepatitis C virus?
hepcats <- teen_druggies %>% 
  filter(hc_likel %in% c(4,5)) %>%
  nrow()

paste0(round(hepcats/nrow(teen_druggies) * 100), "%")
## [1] "39%"
  1. Do a series of pie-charts/ graphs of “Groups of Cases,” putting your race/ ethnicity variables in one at a time to represent the “Slices” (figure out why you can’t accurately represent ethnicity in a single pie-chart). Include the pie charts in your assignment.

Rather than using a pie chart, I used a bar plot to demonstrate that some respondents identified as more than one ethnicity. This would skew a pie chart.

groups_of_cases <- teen_druggies %>%
  mutate(
    ethnicaa = ifelse(ethnicaa == 1, "Afro", NA),
    ethnicna = ifelse(ethnicna == 1, "Native", NA),
    ethnicas = ifelse(ethnicas == 1, "Asian", NA),
    ethnicla = ifelse(ethnicla == 1, "Latinx", NA),
    ethnicot = ifelse(ethnicot == 1, "Other", NA),
    ethnican = ifelse(ethnican == 1, "White", NA),
    alcohol = ifelse(alcohol == 1, "Alcohol", NA), 
    heroin = ifelse(heroin == 1, "Heroin", NA), 
    marijuan = ifelse(marijuan == 1, "Marijuana", NA), 
    cocaine = ifelse(cocaine == 1, "Cocaine", NA), 
    ecstasy = ifelse(ecstasy == 1, "Ecstasy", NA), 
    LSD = ifelse(LSD == 1, "LSD", NA), 
    poppers = ifelse(poppers == 1, "Poppers", NA),
    crack = ifelse(crack == 1, "Crack", NA),
    ghb = ifelse(ghb == 1, "GHB", NA),
    crystal_meth = ifelse(crystal_meth == 1, "Crystal Meth", NA)
  ) %>% 
  select(survey, school,
         alcohol, heroin, marijuan, 
         cocaine, ecstasy, LSD, 
         poppers, crack, ghb,
         crystal_meth, ethnicaa, 
         ethnicna, ethnicas, ethnicla,
         ethnicot, ethnican) %>%
  group_by(survey) %>%
  gather(key = "drug_col", value = "substance", 
         alcohol, heroin, marijuan, cocaine, 
         ecstasy, LSD, poppers, 
         crack, ghb, crystal_meth,
         na.rm = TRUE) %>%
  gather(key = "ethn_col", value = "ethnicity",
         ethnicaa, ethnicna, ethnicas, 
         ethnicla, ethnicot, ethnican,
         na.rm = TRUE) %>%
  select(survey, substance, ethnicity, school) %>%
  group_by(survey) %>%
  mutate(ethn_count = n_distinct(ethnicity))
ggplot(groups_of_cases, 
       mapping = aes(x = substance)) +
  geom_bar(aes(fill = as.factor(ethn_count)), 
           position="stack") +
  theme_classic() +
   scale_fill_manual(values=ggplot_scale) +
  facet_wrap(~ ethnicity, ncol=1) +
  coord_flip()

  1. Do a simple bar chart of levels of education, putting “highest level of education completed as the Category axis.
edu_levels <- read_csv('
1,< 8th grade
2,8th grade
3,9th grade
4,10th grade
5,11th grade
6,12th grade
7,GED
8,Some College
777,Unknown
888,Refused
', col_names=c("school","edu_level"))

highest_edu <- groups_of_cases %>% inner_join(edu_levels)
## Joining, by = "school"
ggplot(highest_edu, 
       mapping = aes(x = edu_level)) +
  geom_bar(aes(fill = ethnicity), 
           position="stack") +
  theme_classic() +
   scale_fill_manual(values=ggplot_scale) +
  coord_flip()

Cut and paste these charts into your Word doc to turn in.