RENAME VARIABLES

Possible Questions:

Drug use among gender
Drug use among the states, race, household incomes

DRUGS TO LOOK AT:

opioids. Look at each drug (nonmedical) and see which one is interesting

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(dplyr)
library(usmap)

US19 <- read.csv("~/Desktop/US/nmurx_us_19Q1.csv", header=TRUE)
#View(US19)

#uS19 <- na.omit(US19)
#view(US19)

GENDER

F <- US19 %>%
  filter(DEM_GENDER == 2) %>%
  group_by(DEM_STATE)

M <- US19 %>%
  filter(DEM_GENDER == 1)

ggplot(US19, aes(x = OP_USE, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
  geom_boxplot() +
  facet_grid(.~DEM_REGION) +
  labs(x= "Opioid Overall Use", y = "Demographic Age", 
       fill = 'Demographic Gender',
       caption = "Faceted by Region",
       title=paste("Boxplot of Overall Opiod Use"))

ggplot(US19, aes(x = OP_USE, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
  geom_boxplot() +
  facet_grid(.~DEM_GENDER)

BENZHYDROCODONE (BHYD_NMU)

mod1 <- lm(BHYD_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod1)

## 
## Call:
## lm(formula = BHYD_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), 
##     data = US19)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44346 -0.27479 -0.18281 -0.02185  0.92415 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.548899   0.064131   8.559  < 2e-16 ***
## DEM_AGE                -0.005858   0.001256  -4.662 3.89e-06 ***
## as.factor(DEM_REGION)2 -0.073214   0.057252  -1.279  0.20148    
## as.factor(DEM_REGION)3 -0.046942   0.051345  -0.914  0.36097    
## as.factor(DEM_REGION)4 -0.088155   0.053686  -1.642  0.10113    
## as.factor(DEM_GENDER)2 -0.092013   0.035549  -2.588  0.00989 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4133 on 576 degrees of freedom
##   (29291 observations deleted due to missingness)
## Multiple R-squared:  0.05103,    Adjusted R-squared:  0.04279 
## F-statistic: 6.195 on 5 and 576 DF,  p-value: 1.317e-05

ggplot(US19, aes(x = BHYD_NMU, fill = as.factor(DEM_REGION))) +
  geom_bar() +
  facet_grid(.~DEM_INCOME)

## Warning: Removed 29291 rows containing non-finite values (stat_count).

ggplot(US19, aes(x = BHYD_NMU, fill = as.factor(DEM_GENHEALTH))) +
  geom_bar() +
  facet_grid(.~DEM_REGION)

## Warning: Removed 29291 rows containing non-finite values (stat_count).

ggplot(US19, aes(x = BHYD_NMU, fill = as.factor(DEM_GENDER))) +
  geom_bar()

## Warning: Removed 29291 rows containing non-finite values (stat_count).

BUPRENORPHINE (BUP_NMU)

mod2 <- lm(BUP_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod2)

## 
## Call:
## lm(formula = BUP_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), 
##     data = US19)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5192 -0.3418 -0.2069  0.5703  1.0067 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.635923   0.062025  10.253  < 2e-16 ***
## DEM_AGE                -0.008913   0.001200  -7.427 2.76e-13 ***
## as.factor(DEM_REGION)2  0.057611   0.053096   1.085    0.278    
## as.factor(DEM_REGION)3  0.031471   0.044413   0.709    0.479    
## as.factor(DEM_REGION)4  0.043731   0.049135   0.890    0.374    
## as.factor(DEM_GENDER)2 -0.040949   0.030934  -1.324    0.186    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4448 on 831 degrees of freedom
##   (29036 observations deleted due to missingness)
## Multiple R-squared:  0.06659,    Adjusted R-squared:  0.06097 
## F-statistic: 11.86 on 5 and 831 DF,  p-value: 4.253e-11

ggplot(US19, aes(x = BUP_NMU, fill = as.factor(DEM_GENDER))) +
  geom_bar()

## Warning: Removed 29036 rows containing non-finite values (stat_count).

ggplot(US19, aes(x = BUP_NMU, fill = as.factor(DEM_REGION))) +
  geom_bar() +
  facet_grid(.~DEM_INCOME)

## Warning: Removed 29036 rows containing non-finite values (stat_count).

ggplot(US19, aes(x = BUP_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
  geom_boxplot()

## Warning: Removed 29036 rows containing missing values (stat_boxplot).

ggplot(US19, aes(x = BUP_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
  geom_boxplot()

## Warning: Removed 29036 rows containing missing values (stat_boxplot).

CODEINE (COD_NMU)

mod3 <- lm(COD_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod3)

## 
## Call:
## lm(formula = COD_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), 
##     data = US19)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37155 -0.16879 -0.09424 -0.02387  1.05365 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.452412   0.013046  34.679   <2e-16 ***
## DEM_AGE                -0.005296   0.000195 -27.155   <2e-16 ***
## as.factor(DEM_REGION)2 -0.024528   0.009456  -2.594   0.0095 ** 
## as.factor(DEM_REGION)3  0.005711   0.008611   0.663   0.5072    
## as.factor(DEM_REGION)4  0.019763   0.009326   2.119   0.0341 *  
## as.factor(DEM_GENDER)2 -0.080956   0.006014 -13.462   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3228 on 11766 degrees of freedom
##   (18101 observations deleted due to missingness)
## Multiple R-squared:  0.07326,    Adjusted R-squared:  0.07287 
## F-statistic:   186 on 5 and 11766 DF,  p-value: < 2.2e-16

ggplot(US19, aes(x = COD_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
  geom_boxplot() +
  labs(x="Codiene Non-Medical Use", y = "Demographic Age", 
       fill = 'Demographic Region',
       title=paste("Boxplot of Codeiene Non-Medical Use Among Regions"))

## Warning: Removed 18101 rows containing missing values (stat_boxplot).

ggplot(US19, aes(x = COD_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
  geom_boxplot()

## Warning: Removed 18101 rows containing missing values (stat_boxplot).

ggplot(US19, aes(x = COD_NMU, fill = as.factor(DEM_REGION))) +
  geom_bar() +
  facet_grid(.~DEM_INCOME) +
  labs(x="Codiene Non-Medical Use", y = "Count", 
       caption = "Faceted by the Demographic Income", 
       fill = 'Demographic Region',
       title=paste("Codeiene Non-Medical Use Among Regions"))

## Warning: Removed 18101 rows containing non-finite values (stat_count).

ggplot(US19, aes(x = COD_NMU, fill = as.factor(DEM_GENDER))) +
  geom_bar()

## Warning: Removed 18101 rows containing non-finite values (stat_count).

DIHYDROCODEINE (DIHY_NMU)

mod4 <- lm(DIHY_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod4)

## 
## Call:
## lm(formula = DIHY_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), 
##     data = US19)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53952 -0.25751 -0.17141 -0.00255  0.95034 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.634337   0.066299   9.568  < 2e-16 ***
## DEM_AGE                -0.005268   0.001245  -4.232 2.76e-05 ***
## as.factor(DEM_REGION)2 -0.137314   0.065150  -2.108 0.035569 *  
## as.factor(DEM_REGION)3 -0.197729   0.055292  -3.576 0.000383 ***
## as.factor(DEM_REGION)4 -0.225890   0.059066  -3.824 0.000148 ***
## as.factor(DEM_GENDER)2 -0.069069   0.037578  -1.838 0.066668 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4072 on 489 degrees of freedom
##   (29378 observations deleted due to missingness)
## Multiple R-squared:  0.0814, Adjusted R-squared:  0.07201 
## F-statistic: 8.667 on 5 and 489 DF,  p-value: 6.948e-08

ggplot(US19, aes(x = DIHY_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
  geom_boxplot() + 
  labs(x="Dihydrocodiene Non-Medical Use", y = "Demographic Age", 
       fill = 'Demographic Region',
       title=paste("Boxplot of Dihydroodeiene Non-Medical Use Among Regions"))

## Warning: Removed 29378 rows containing missing values (stat_boxplot).

ggplot(US19, aes(x = DIHY_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
  geom_boxplot()

## Warning: Removed 29378 rows containing missing values (stat_boxplot).

ggplot(US19, aes(x = DIHY_NMU, fill = as.factor(DEM_REGION))) +
  geom_bar() +
  facet_grid(.~DEM_INCOME) +
  labs(x="Dihydrocodiene Non-Medical Use", y = "Count", 
      caption = "Faceted by the Demographic Income", 
      fill = 'Demographic Region',
      title=paste("Dihydrocodeiene Non-Medical Use Among Regions"))

## Warning: Removed 29378 rows containing non-finite values (stat_count).

ggplot(US19, aes(x = DIHY_NMU, fill = as.factor(DEM_GENDER))) +
  geom_bar()

## Warning: Removed 29378 rows containing non-finite values (stat_count).

ELUXADOLINE (ELU_NMU)

mod5 <- lm(ELU_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod5)

## 
## Call:
## lm(formula = ELU_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), 
##     data = US19)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6222 -0.4147 -0.2207  0.5273  0.8272 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             0.659350   0.116065   5.681 3.98e-08 ***
## DEM_AGE                -0.006267   0.002644  -2.371  0.01858 *  
## as.factor(DEM_REGION)2  0.081973   0.101652   0.806  0.42083    
## as.factor(DEM_REGION)3 -0.036244   0.084657  -0.428  0.66895    
## as.factor(DEM_REGION)4 -0.025324   0.095592  -0.265  0.79131    
## as.factor(DEM_GENDER)2 -0.199577   0.065076  -3.067  0.00242 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4708 on 233 degrees of freedom
##   (29634 observations deleted due to missingness)
## Multiple R-squared:  0.07542,    Adjusted R-squared:  0.05558 
## F-statistic: 3.801 on 5 and 233 DF,  p-value: 0.002498

ggplot(US19, aes(x = ELU_NMU, fill = as.factor(DEM_REGION))) +
  geom_bar() +
  facet_grid(.~DEM_INCOME)

## Warning: Removed 29634 rows containing non-finite values (stat_count).

ggplot(US19, aes(x = ELU_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
  geom_boxplot()

## Warning: Removed 29634 rows containing missing values (stat_boxplot).

ggplot(US19, aes(x = ELU_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
  geom_boxplot()

## Warning: Removed 29634 rows containing missing values (stat_boxplot).

FENTANYL (FENT_NMU)

ggplot(US19, aes(x = FENT_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
  geom_boxplot()

## Warning: Removed 28454 rows containing missing values (stat_boxplot).

ggplot(US19, aes(x = FENT_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
  geom_boxplot()

## Warning: Removed 28454 rows containing missing values (stat_boxplot).

ASK PROF KITADA FOR HELP

##south <- usmap::plot_usmap("states", include = c('DE', 'DC', 'FL', 'GA', 'MD', 'NC', 'SC', 
  #                                        'VA', 'WV', 'AL', 'KY', 'MS', 'TN', 'AR', 
   #                                      'LA', 'OK', 'TX'), labels = TRUE)

#region <- US19 %>%
#  group_by(DEM_REGION, AMPH_USE) %>%
  #filter(DEM_REGION == 3) %>%
  #count(AMPH_USE, na.rm = TRUE)
#  summarise(nUSE = n(),
 #           nwgtuse = sum(AMPH_USE, na.rm = TRUE))

#regionS<-US19%>%
 # group_by(DEM_REGION, AMPH_USE) %>%
  #filter(DEM_REGION == 3) %>%
#  mutate(n = n(), 
#         nwgt=sum(AMPH_USE, na.rm = TRUE))

#statePropuse<-region%>%
 # filter(DEM_REGION == 3) %>%
 # left_join(regionS)%>%
 # mutate(sampPropuse=nUSE/n, 
 #        wgtPropuse=nwgtuse/nwgt)

#mapPropUSE<- south %>%
 # mutate(DEM_RGGION = full) %>%
 # left_join(statePropuse)

#p<-statePropuse%>%
 # ggplot(aes(x, y, group = group)) +
 # geom_polygon(aes(text=DEM_REGION, fill = wgtPropuse),color="black")+
  #theme_bw()+
  #theme(axis.title.x=element_blank(),
        #axis.text.x=element_blank(),
       # axis.ticks.x=element_blank(),
       # axis.title.y=element_blank(),
        #axis.text.y=element_blank(),
        #axis.ticks.y=element_blank())+
  #labs(x="", y = "", 
    #   caption = "(Based on data from IPUMS)", 
   #    fill = 'Percent',
    #   title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
  #scale_fill_viridis(direction = -1)

#ggplotly(p)

DATA FEST 2021

Kela Iwata & Nikki Seina

4/1/2021

RENAME VARIABLES

Possible Questions:

DRUGS TO LOOK AT:

GENDER

BENZHYDROCODONE (BHYD_NMU)

BUPRENORPHINE (BUP_NMU)

CODEINE (COD_NMU)

DIHYDROCODEINE (DIHY_NMU)

ELUXADOLINE (ELU_NMU)

FENTANYL (FENT_NMU)

ASK PROF KITADA FOR HELP