opioids. Look at each drug (nonmedical) and see which one is interesting
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(usmap)
US19 <- read.csv("~/Desktop/US/nmurx_us_19Q1.csv", header=TRUE)
#View(US19)
#uS19 <- na.omit(US19)
#view(US19)
F <- US19 %>%
filter(DEM_GENDER == 2) %>%
group_by(DEM_STATE)
M <- US19 %>%
filter(DEM_GENDER == 1)
ggplot(US19, aes(x = OP_USE, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
geom_boxplot() +
facet_grid(.~DEM_REGION) +
labs(x= "Opioid Overall Use", y = "Demographic Age",
fill = 'Demographic Gender',
caption = "Faceted by Region",
title=paste("Boxplot of Overall Opiod Use"))
ggplot(US19, aes(x = OP_USE, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
geom_boxplot() +
facet_grid(.~DEM_GENDER)
mod1 <- lm(BHYD_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod1)
##
## Call:
## lm(formula = BHYD_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER),
## data = US19)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44346 -0.27479 -0.18281 -0.02185 0.92415
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.548899 0.064131 8.559 < 2e-16 ***
## DEM_AGE -0.005858 0.001256 -4.662 3.89e-06 ***
## as.factor(DEM_REGION)2 -0.073214 0.057252 -1.279 0.20148
## as.factor(DEM_REGION)3 -0.046942 0.051345 -0.914 0.36097
## as.factor(DEM_REGION)4 -0.088155 0.053686 -1.642 0.10113
## as.factor(DEM_GENDER)2 -0.092013 0.035549 -2.588 0.00989 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4133 on 576 degrees of freedom
## (29291 observations deleted due to missingness)
## Multiple R-squared: 0.05103, Adjusted R-squared: 0.04279
## F-statistic: 6.195 on 5 and 576 DF, p-value: 1.317e-05
ggplot(US19, aes(x = BHYD_NMU, fill = as.factor(DEM_REGION))) +
geom_bar() +
facet_grid(.~DEM_INCOME)
## Warning: Removed 29291 rows containing non-finite values (stat_count).
ggplot(US19, aes(x = BHYD_NMU, fill = as.factor(DEM_GENHEALTH))) +
geom_bar() +
facet_grid(.~DEM_REGION)
## Warning: Removed 29291 rows containing non-finite values (stat_count).
ggplot(US19, aes(x = BHYD_NMU, fill = as.factor(DEM_GENDER))) +
geom_bar()
## Warning: Removed 29291 rows containing non-finite values (stat_count).
mod2 <- lm(BUP_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod2)
##
## Call:
## lm(formula = BUP_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER),
## data = US19)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5192 -0.3418 -0.2069 0.5703 1.0067
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.635923 0.062025 10.253 < 2e-16 ***
## DEM_AGE -0.008913 0.001200 -7.427 2.76e-13 ***
## as.factor(DEM_REGION)2 0.057611 0.053096 1.085 0.278
## as.factor(DEM_REGION)3 0.031471 0.044413 0.709 0.479
## as.factor(DEM_REGION)4 0.043731 0.049135 0.890 0.374
## as.factor(DEM_GENDER)2 -0.040949 0.030934 -1.324 0.186
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4448 on 831 degrees of freedom
## (29036 observations deleted due to missingness)
## Multiple R-squared: 0.06659, Adjusted R-squared: 0.06097
## F-statistic: 11.86 on 5 and 831 DF, p-value: 4.253e-11
ggplot(US19, aes(x = BUP_NMU, fill = as.factor(DEM_GENDER))) +
geom_bar()
## Warning: Removed 29036 rows containing non-finite values (stat_count).
ggplot(US19, aes(x = BUP_NMU, fill = as.factor(DEM_REGION))) +
geom_bar() +
facet_grid(.~DEM_INCOME)
## Warning: Removed 29036 rows containing non-finite values (stat_count).
ggplot(US19, aes(x = BUP_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
geom_boxplot()
## Warning: Removed 29036 rows containing missing values (stat_boxplot).
ggplot(US19, aes(x = BUP_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
geom_boxplot()
## Warning: Removed 29036 rows containing missing values (stat_boxplot).
mod3 <- lm(COD_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod3)
##
## Call:
## lm(formula = COD_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER),
## data = US19)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37155 -0.16879 -0.09424 -0.02387 1.05365
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.452412 0.013046 34.679 <2e-16 ***
## DEM_AGE -0.005296 0.000195 -27.155 <2e-16 ***
## as.factor(DEM_REGION)2 -0.024528 0.009456 -2.594 0.0095 **
## as.factor(DEM_REGION)3 0.005711 0.008611 0.663 0.5072
## as.factor(DEM_REGION)4 0.019763 0.009326 2.119 0.0341 *
## as.factor(DEM_GENDER)2 -0.080956 0.006014 -13.462 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3228 on 11766 degrees of freedom
## (18101 observations deleted due to missingness)
## Multiple R-squared: 0.07326, Adjusted R-squared: 0.07287
## F-statistic: 186 on 5 and 11766 DF, p-value: < 2.2e-16
ggplot(US19, aes(x = COD_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
geom_boxplot() +
labs(x="Codiene Non-Medical Use", y = "Demographic Age",
fill = 'Demographic Region',
title=paste("Boxplot of Codeiene Non-Medical Use Among Regions"))
## Warning: Removed 18101 rows containing missing values (stat_boxplot).
ggplot(US19, aes(x = COD_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
geom_boxplot()
## Warning: Removed 18101 rows containing missing values (stat_boxplot).
ggplot(US19, aes(x = COD_NMU, fill = as.factor(DEM_REGION))) +
geom_bar() +
facet_grid(.~DEM_INCOME) +
labs(x="Codiene Non-Medical Use", y = "Count",
caption = "Faceted by the Demographic Income",
fill = 'Demographic Region',
title=paste("Codeiene Non-Medical Use Among Regions"))
## Warning: Removed 18101 rows containing non-finite values (stat_count).
ggplot(US19, aes(x = COD_NMU, fill = as.factor(DEM_GENDER))) +
geom_bar()
## Warning: Removed 18101 rows containing non-finite values (stat_count).
mod4 <- lm(DIHY_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod4)
##
## Call:
## lm(formula = DIHY_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER),
## data = US19)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53952 -0.25751 -0.17141 -0.00255 0.95034
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.634337 0.066299 9.568 < 2e-16 ***
## DEM_AGE -0.005268 0.001245 -4.232 2.76e-05 ***
## as.factor(DEM_REGION)2 -0.137314 0.065150 -2.108 0.035569 *
## as.factor(DEM_REGION)3 -0.197729 0.055292 -3.576 0.000383 ***
## as.factor(DEM_REGION)4 -0.225890 0.059066 -3.824 0.000148 ***
## as.factor(DEM_GENDER)2 -0.069069 0.037578 -1.838 0.066668 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4072 on 489 degrees of freedom
## (29378 observations deleted due to missingness)
## Multiple R-squared: 0.0814, Adjusted R-squared: 0.07201
## F-statistic: 8.667 on 5 and 489 DF, p-value: 6.948e-08
ggplot(US19, aes(x = DIHY_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
geom_boxplot() +
labs(x="Dihydrocodiene Non-Medical Use", y = "Demographic Age",
fill = 'Demographic Region',
title=paste("Boxplot of Dihydroodeiene Non-Medical Use Among Regions"))
## Warning: Removed 29378 rows containing missing values (stat_boxplot).
ggplot(US19, aes(x = DIHY_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
geom_boxplot()
## Warning: Removed 29378 rows containing missing values (stat_boxplot).
ggplot(US19, aes(x = DIHY_NMU, fill = as.factor(DEM_REGION))) +
geom_bar() +
facet_grid(.~DEM_INCOME) +
labs(x="Dihydrocodiene Non-Medical Use", y = "Count",
caption = "Faceted by the Demographic Income",
fill = 'Demographic Region',
title=paste("Dihydrocodeiene Non-Medical Use Among Regions"))
## Warning: Removed 29378 rows containing non-finite values (stat_count).
ggplot(US19, aes(x = DIHY_NMU, fill = as.factor(DEM_GENDER))) +
geom_bar()
## Warning: Removed 29378 rows containing non-finite values (stat_count).
mod5 <- lm(ELU_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER), US19)
summary(mod5)
##
## Call:
## lm(formula = ELU_NMU ~ DEM_AGE + as.factor(DEM_REGION) + as.factor(DEM_GENDER),
## data = US19)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6222 -0.4147 -0.2207 0.5273 0.8272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.659350 0.116065 5.681 3.98e-08 ***
## DEM_AGE -0.006267 0.002644 -2.371 0.01858 *
## as.factor(DEM_REGION)2 0.081973 0.101652 0.806 0.42083
## as.factor(DEM_REGION)3 -0.036244 0.084657 -0.428 0.66895
## as.factor(DEM_REGION)4 -0.025324 0.095592 -0.265 0.79131
## as.factor(DEM_GENDER)2 -0.199577 0.065076 -3.067 0.00242 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4708 on 233 degrees of freedom
## (29634 observations deleted due to missingness)
## Multiple R-squared: 0.07542, Adjusted R-squared: 0.05558
## F-statistic: 3.801 on 5 and 233 DF, p-value: 0.002498
ggplot(US19, aes(x = ELU_NMU, fill = as.factor(DEM_REGION))) +
geom_bar() +
facet_grid(.~DEM_INCOME)
## Warning: Removed 29634 rows containing non-finite values (stat_count).
ggplot(US19, aes(x = ELU_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
geom_boxplot()
## Warning: Removed 29634 rows containing missing values (stat_boxplot).
ggplot(US19, aes(x = ELU_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
geom_boxplot()
## Warning: Removed 29634 rows containing missing values (stat_boxplot).
ggplot(US19, aes(x = FENT_NMU, y = DEM_AGE, fill = as.factor(DEM_REGION))) +
geom_boxplot()
## Warning: Removed 28454 rows containing missing values (stat_boxplot).
ggplot(US19, aes(x = FENT_NMU, y = DEM_AGE, fill = as.factor(DEM_GENDER))) +
geom_boxplot()
## Warning: Removed 28454 rows containing missing values (stat_boxplot).
##south <- usmap::plot_usmap("states", include = c('DE', 'DC', 'FL', 'GA', 'MD', 'NC', 'SC',
# 'VA', 'WV', 'AL', 'KY', 'MS', 'TN', 'AR',
# 'LA', 'OK', 'TX'), labels = TRUE)
#region <- US19 %>%
# group_by(DEM_REGION, AMPH_USE) %>%
#filter(DEM_REGION == 3) %>%
#count(AMPH_USE, na.rm = TRUE)
# summarise(nUSE = n(),
# nwgtuse = sum(AMPH_USE, na.rm = TRUE))
#regionS<-US19%>%
# group_by(DEM_REGION, AMPH_USE) %>%
#filter(DEM_REGION == 3) %>%
# mutate(n = n(),
# nwgt=sum(AMPH_USE, na.rm = TRUE))
#statePropuse<-region%>%
# filter(DEM_REGION == 3) %>%
# left_join(regionS)%>%
# mutate(sampPropuse=nUSE/n,
# wgtPropuse=nwgtuse/nwgt)
#mapPropUSE<- south %>%
# mutate(DEM_RGGION = full) %>%
# left_join(statePropuse)
#p<-statePropuse%>%
# ggplot(aes(x, y, group = group)) +
# geom_polygon(aes(text=DEM_REGION, fill = wgtPropuse),color="black")+
#theme_bw()+
#theme(axis.title.x=element_blank(),
#axis.text.x=element_blank(),
# axis.ticks.x=element_blank(),
# axis.title.y=element_blank(),
#axis.text.y=element_blank(),
#axis.ticks.y=element_blank())+
#labs(x="", y = "",
# caption = "(Based on data from IPUMS)",
# fill = 'Percent',
# title=paste("Voter Turn-out is Higher in Presidential Election Years"))+
#scale_fill_viridis(direction = -1)
#ggplotly(p)