Lewis-beck et al. 追試
政策変数を作成
#------set policy variables
library(mixtools)
set.seed(123) # set seed
n <- 1000 # Sample size
weights <- c(0.5, 0.5) # weights for each distribution
means <- c(2.5, 6.5) # means of each distribution
sds <- c(0.5, 0.5) # st.d of each distribution
# random samples
policy <- numeric(n)
for (i in 1:n) {
if (runif(1) < weights[1]) {
policy[i] <- rnorm(1, mean = means[1], sd = sds[1])
} else {
policy[i] <- rnorm(1, mean = means[2], sd = sds[2])
}
}
# Visualize with a histogram
hist(policy, breaks=30, col='lightblue', main='Histogram of Bimodal Distribution', xlab='X', border='white')PID変数とvote変数を作成
#-------set PID and vote variables and combine 3 variables
set.seed(123) # set seed
# Probability that dem_pid is 1 when policy is 4 or more is 0.8, otherwise it is 0.2
dem_pid <- ifelse(policy >= 4, rbinom(length(policy), 1, 0.8), rbinom(length(policy), 1, 0.2))
# Probability that dem_vote is 1 when policy is 4 or more is 0.8, otherwise it is 0.2
dem_vote <- ifelse(policy >= 4, rbinom(length(policy), 1, 0.8), rbinom(length(policy), 1, 0.2))
# Combine into a dataframe
df <- data.frame(policy, dem_pid, dem_vote)
# Display the head of the dataframe to check the content
head(df)## policy dem_pid dem_vote
## 1 2.900277 0 0
## 2 7.279354 1 1
## 3 7.119748 1 1
## 4 3.357532 1 0
## 5 6.591541 0 1
## 6 6.156574 1 1
各種推定を行う。最初に平均値の差の確認。
#--------estimation
### (1) mean comparison
# Calculate the mean of dem_vote by dem_pid
group_means <- aggregate(dem_vote ~ dem_pid, data = df, mean)
print(group_means)## dem_pid dem_vote
## 1 0 0.3069498
## 2 1 0.7157676
## [1] 0.4088178
図でも確認。
#--------estimation
### (1) mean comparison
library(ggplot2)
library(ggsignif)
# Create a graph using ggplot2
p <- ggplot(df, aes(x = factor(dem_pid), y = dem_vote, group = dem_pid)) +
geom_bar(stat = "summary", fun = "mean", fill = c("grey", "black"), position = position_dodge(0.8), width = 0.7) +
geom_errorbar(stat = "summary", fun.data = mean_se, width = 0.2, position = position_dodge(0.8)) +
labs(x = "dem_pid Group", y = "Average of dem_vote", title = "Comparison of dem_vote by dem_pid Groups") +
theme_minimal()
# Add statistical significance using geom_signif
p + geom_signif(comparisons = list(c("0", "1")), map_signif_level = TRUE)続いてOLS。
##
## Call:
## lm(formula = dem_vote ~ dem_pid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7158 -0.3070 0.2842 0.2842 0.6931
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.30695 0.02007 15.29 <2e-16 ***
## dem_pid 0.40882 0.02891 14.14 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4568 on 998 degrees of freedom
## Multiple R-squared: 0.1669, Adjusted R-squared: 0.1661
## F-statistic: 200 on 1 and 998 DF, p-value: < 2.2e-16
## dem_pid
## 0.4088178
最後にlogit。
### (3) Logit
library(Zelig)
zpar <- zelig(dem_vote ~ dem_pid, df, model="logit", cite = F) #logistic estimation
x.notdem <- setx(zpar, dem_pid = 0) #set PID = 0
x.dem <- setx(zpar, dem_pid = 1 ) #set PID = 1
s.out <- sim(zpar, x = x.notdem, x1 = x.dem) #simulate 1000 samples
sim_rul <- as.data.frame(s.out[["sim.out"]][["x1"]][["fd"]][[1]]) #extract the first difference between dem_pid = 1 and dem_pid = 0
sim_rul_fd <- quantile(sim_rul[,1], c(0.5, .025, .975)) # identify 50% value as the first difference and 2.5% and 97.5% values as confidence intervals
logit_est <- sim_rul_fd[1]
print(logit_est)## 50%
## 0.4073414
3種類の推定結果を並べて表示。3つの推定値がほぼ近似しているのが確かめられる。
### combine three results
data.frame(estimates = c("Mean Difference", "OLS", "Logit"), values = c(diff_mean, ols_est, logit_est ))## estimates values
## Mean Difference 0.4088178
## dem_pid OLS 0.4088178
## 50% Logit 0.4073414