Lewis-beck et al. 追試

政策変数を作成

#------set policy variables
library(mixtools)

set.seed(123)  # set seed
n <- 1000  # Sample size
weights <- c(0.5, 0.5)  # weights for each distribution
means <- c(2.5, 6.5)  # means of each distribution
sds <- c(0.5, 0.5)  # st.d of each distribution

# random samples
policy <- numeric(n)
for (i in 1:n) {
  if (runif(1) < weights[1]) {
    policy[i] <- rnorm(1, mean = means[1], sd = sds[1])
  } else {
    policy[i] <- rnorm(1, mean = means[2], sd = sds[2])
  }
}

# Visualize with a histogram
hist(policy, breaks=30, col='lightblue', main='Histogram of Bimodal Distribution', xlab='X', border='white')

PID変数とvote変数を作成

#-------set PID and vote variables and combine 3 variables

set.seed(123)  # set seed

# Probability that dem_pid is 1 when policy is 4 or more is 0.8, otherwise it is 0.2
dem_pid <- ifelse(policy >= 4, rbinom(length(policy), 1, 0.8), rbinom(length(policy), 1, 0.2))

# Probability that dem_vote is 1 when policy is 4 or more is 0.8, otherwise it is 0.2
dem_vote <- ifelse(policy >= 4, rbinom(length(policy), 1, 0.8), rbinom(length(policy), 1, 0.2))

# Combine into a dataframe
df <- data.frame(policy, dem_pid, dem_vote)

# Display the head of the dataframe to check the content
head(df)
##     policy dem_pid dem_vote
## 1 2.900277       0        0
## 2 7.279354       1        1
## 3 7.119748       1        1
## 4 3.357532       1        0
## 5 6.591541       0        1
## 6 6.156574       1        1

各種推定を行う。最初に平均値の差の確認。

#--------estimation

### (1) mean comparison


# Calculate the mean of dem_vote by dem_pid
group_means <- aggregate(dem_vote ~ dem_pid, data = df, mean)
print(group_means)
##   dem_pid  dem_vote
## 1       0 0.3069498
## 2       1 0.7157676
diff_mean = group_means$dem_vote[2] - group_means$dem_vote[1]
print(diff_mean)
## [1] 0.4088178

図でも確認。

#--------estimation

### (1) mean comparison

library(ggplot2)
library(ggsignif)


# Create a graph using ggplot2
p <- ggplot(df, aes(x = factor(dem_pid), y = dem_vote, group = dem_pid)) +
  geom_bar(stat = "summary", fun = "mean", fill = c("grey", "black"), position = position_dodge(0.8), width = 0.7) +
  geom_errorbar(stat = "summary", fun.data = mean_se, width = 0.2, position = position_dodge(0.8)) +
  labs(x = "dem_pid Group", y = "Average of dem_vote", title = "Comparison of dem_vote by dem_pid Groups") +
  theme_minimal()

# Add statistical significance using geom_signif
p + geom_signif(comparisons = list(c("0", "1")), map_signif_level = TRUE)

続いてOLS。

### (2) OLS

result_lm <- lm(dem_vote ~ dem_pid)
summary(result_lm)
## 
## Call:
## lm(formula = dem_vote ~ dem_pid)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7158 -0.3070  0.2842  0.2842  0.6931 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.30695    0.02007   15.29   <2e-16 ***
## dem_pid      0.40882    0.02891   14.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4568 on 998 degrees of freedom
## Multiple R-squared:  0.1669, Adjusted R-squared:  0.1661 
## F-statistic:   200 on 1 and 998 DF,  p-value: < 2.2e-16
ols_est <- result_lm$coefficients[2]
print(ols_est)
##   dem_pid 
## 0.4088178

最後にlogit。

### (3) Logit

library(Zelig)

zpar <- zelig(dem_vote ~ dem_pid, df, model="logit", cite = F) #logistic estimation
x.notdem <- setx(zpar, dem_pid = 0) #set PID = 0
x.dem <- setx(zpar, dem_pid = 1 ) #set PID = 1
s.out <- sim(zpar, x = x.notdem, x1 = x.dem) #simulate 1000 samples
sim_rul <- as.data.frame(s.out[["sim.out"]][["x1"]][["fd"]][[1]]) #extract the first difference between dem_pid = 1 and dem_pid = 0
sim_rul_fd <- quantile(sim_rul[,1], c(0.5, .025, .975)) # identify 50% value as the first difference and 2.5% and 97.5% values as confidence intervals
logit_est <- sim_rul_fd[1]

print(logit_est)
##       50% 
## 0.4073414

3種類の推定結果を並べて表示。3つの推定値がほぼ近似しているのが確かめられる。

### combine three results

data.frame(estimates = c("Mean Difference", "OLS", "Logit"), values = c(diff_mean, ols_est, logit_est ))
##               estimates    values
##         Mean Difference 0.4088178
## dem_pid             OLS 0.4088178
## 50%               Logit 0.4073414