Paul Kim

Soc 712

Prof. Song

Research Question: What is race difference between Whites and Blacks, the sex difference and the trend in personal income for people using heroin?

Hypothesis: Whites will have a higher chance of using heroin than Blacks. Men will have a higher chance of using heroin.As income increase, the chance of using heroin will increase.

Dataset: This dataset is from Brett’s data which he posted on data.world. This dataset covers drug use and demographic information.

The analysis of the data are showing that Whites have a lower chance of using heroin than Blacks. Also the graph for hIncome is showing that the personal income effect on the use of heroin is that as personal income increases the chance of using heroin decreases. The fd variable is showing the first difference between males and females. The first difference is the sex difference. The sex difference for hSex is -0.01869674. This means that there is a -0.01869674 chance of females using heroin than compared to males. So females have a lower chance of using heroin than males. The plots of hSex are showing the predicted and expected vales for males and females which are then used to find the first difference. hWhite and hBlack are showing the sex differences among Whites and Black for chances of using heroin. The sex difference for Black males and females is -0.009181576 which means that Black females have a lower chance of using heroin than compared to Black males. The sex difference for White males and females is -0.01854957 which means that White females have a lower chance of using heroin than compared to White males. The ggplot2 graphs are showing the differences in chance of using heroin between White and Blacks. By looking at where the mean or the center of the clusters are the graphs are showing that Whites have a lower chance of using heroin than compared to Blacks.

library(Zelig)
## Loading required package: survival
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(sjmisc)
library(radiant.data)
## Loading required package: magrittr
## Loading required package: ggplot2
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
## Loading required package: tidyr
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
## The following object is masked from 'package:sjmisc':
## 
##     replace_na
## 
## Attaching package: 'radiant.data'
## The following objects are masked from 'package:lubridate':
## 
##     month, wday
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
## The following objects are masked from 'package:sjmisc':
## 
##     center, is_empty
## The following object is masked from 'package:dplyr':
## 
##     mutate_each
library(tidyr)
library(readr)
library(ggplot2)
drugData <- read_csv("/Users/paulkim/Downloads/balexturner-drug-use-employment-work-absence-income-race-education/data/nsduh_workforce_adults.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   column_a = col_integer(),
##   irpinc3 = col_integer(),
##   irfamin3 = col_integer(),
##   countofdrugs_ever = col_integer(),
##   countofdrugs_month = col_integer(),
##   countofdrugs_year = col_integer(),
##   personalincome = col_integer(),
##   familyincome = col_integer(),
##   questid2 = col_integer(),
##   employmentstatus = col_integer(),
##   preemploymentdrugtest = col_integer(),
##   randomdrugtest = col_integer(),
##   everdrugtest = col_integer(),
##   race_num = col_integer(),
##   education = col_integer(),
##   wouldworkfordrugtester = col_integer(),
##   selectiveleave = col_integer(),
##   skipsick = col_integer(),
##   sex = col_integer()
## )
## See spec(...) for full column specifications.
head(drugData)
library(dplyr)
heroin <- drugData%>%
  mutate(heroin_ever = ifelse(heroin_ever == "true", 1, 0),
         race_str = factor(race_str, levels = c("White", "Hispanic", "Asian", "Black/African American",
                                                "Native American/Alaskan Native", "Hawaiian/Pacific Islander", "Mixed")),
         sex = ifelse(sex == 1, "Male","Female"))
heroin$sex = as.factor(heroin$sex)
heroin1<-na.omit(heroin)

Personal Income Effect

hIncome <- zls$new()
hIncome$zelig(heroin_ever ~ race_str + sex*personalincome,  data = heroin1)
hIncome$setx(sex = "Male")
hIncome$setx1(sex = "Female")
hIncome$setrange(personalincome = 1:7)
hIncome$sim()
ci.plot(hIncome)

Sex Difference

hSex <- zlogit$new()
hSex$zelig(heroin_ever ~ race_str + sex*personalincome,  data = heroin1)
hSex$setx(sex = "Male")
hSex$setx1(sex = "Female")
hSex$sim()
fd <- hSex$get_qi(xvalue="x1", qi="fd")
summary(fd)
##        V1          
##  Min.   :-0.02655  
##  1st Qu.:-0.01997  
##  Median :-0.01858  
##  Mean   :-0.01854  
##  3rd Qu.:-0.01711  
##  Max.   :-0.01218
plot(hSex)

hWhite <- zlogit$new()
hWhite$zelig(heroin_ever ~ race_str + sex*personalincome,  data = heroin1)
hWhite$setx(sex = "Male", race_str = "White")
hWhite$setx1(sex = "Female", race_str = "White")
hWhite$sim()
plot(hWhite)

hBlack <- zlogit$new()
hBlack$zelig(heroin_ever ~ race_str + sex*personalincome,  data = heroin1)
hBlack$setx(sex = "Male", race_str = "Black/African American")
hBlack$setx1(sex = "Female", race_str = "Black/African American")
hBlack$sim()
plot(hBlack)

d1 <- hWhite$get_qi(xvalue="x1", qi="fd")
d2 <- hBlack$get_qi(xvalue="x1", qi="fd")

dfd <- as.data.frame(cbind(d1, d2))
head(dfd)
tidd <- dfd %>% 
  gather(class, simv)

tidd %>% 
  group_by(class) %>% 
  summarise(mean = mean(simv), sd = sd(simv))
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.