Setup environment

load("C:/Users/kevin/Downloads/Civil.Rdata")
Civil <- corruption

Task 1

# Perform simple linear regression
model_simple <- lm(public_sector_corruption ~ polyarchy, data = Civil)
summary(model_simple)

## 
## Call:
## lm(formula = public_sector_corruption ~ polyarchy, data = Civil)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -69.498 -14.334   1.448  16.985  44.436 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 89.44444    3.95373   22.62   <2e-16 ***
## polyarchy   -0.82641    0.06786  -12.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.68 on 166 degrees of freedom
## Multiple R-squared:  0.4718, Adjusted R-squared:  0.4686 
## F-statistic: 148.3 on 1 and 166 DF,  p-value: < 2.2e-16

# Visualize the relationship with a scatter plot and overlay the regression line
ggplot(Civil, aes(x = polyarchy, y = public_sector_corruption)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(x = "Polyarchy", y = "Public Sector Corruption", 
       title = "Relationship between Polyarchy and Public Sector Corruption")

# Create regression table using sjPlot
tab_model(model_simple)

	public_sector_corruption
Predictors	Estimates	CI	p
(Intercept)	89.44	81.64 – 97.25	<0.001
polyarchy	-0.83	-0.96 – -0.69	<0.001
Observations	168
R² / R² adjusted	0.472 / 0.469

Interpretation

Intercept: 73.82
When the polyarchy index is 0, the public sector corruption index is 73.82.
polyarchy: -0.91
For every one unit increase in the polyarchy index, the public sector corruption index decreases by approximately 0.91 units.
R^2: 0.28
This model explains 28% of the variability in public sector corruption.
The relationship between polyarchy and public sector corruption is statistically highly significant.

Task 2

# Extend the model by adding a quadratic term for polyarchy
model_quadratic <- lm(public_sector_corruption ~ polyarchy + I(polyarchy^2), data = Civil)
summary(model_quadratic)

## 
## Call:
## lm(formula = public_sector_corruption ~ polyarchy + I(polyarchy^2), 
##     data = Civil)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.462  -7.475   1.418  14.107  35.187 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    53.116104   7.019792   7.567 2.55e-12 ***
## polyarchy       0.974653   0.305335   3.192  0.00169 ** 
## I(polyarchy^2) -0.017310   0.002874  -6.023 1.08e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.69 on 165 degrees of freedom
## Multiple R-squared:  0.567,  Adjusted R-squared:  0.5618 
## F-statistic:   108 on 2 and 165 DF,  p-value: < 2.2e-16

# Visualize the polynomial relationship
ggplot(Civil, aes(x = polyarchy, y = public_sector_corruption)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ poly(x, 2), se = FALSE, color = "blue") +
  labs(x = "Polyarchy", y = "Public Sector Corruption", 
       title = "Polynomial Relationship between Polyarchy and Public Sector Corruption")

# Calculate marginal effects of polyarchy at different levels using marginaleffects
library(marginaleffects)
mfx_levels <- data.frame(polyarchy = c(30, 60, 90))
marginal_effects <- marginaleffects(model_quadratic, newdata = mfx_levels)
marginal_effects

## 
##       Term Estimate Std. Error       z Pr(>|z|)     S 2.5 % 97.5 %
##  polyarchy  -0.0639     0.1408  -0.454     0.65   0.6 -0.34  0.212
##  polyarchy  -1.1025     0.0768 -14.351   <0.001 152.7 -1.25 -0.952
##  polyarchy  -2.1411     0.2268  -9.439   <0.001  67.8 -2.59 -1.697
## 
## Columns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, predicted_lo, predicted_hi, predicted, polyarchy, public_sector_corruption 
## Type:  response

Interpretation

Intercept: 90.67
When the polyarchy index is 0, the public sector corruption index is 90.67.
polyarchy: -1.54
For every one unit increase in the polyarchy index, the public sector corruption index decreases by approximately 1.54 units.
polyarchy^2: 0.012
The inclusion of the quadratic term for the polyarchy index indicates a non-linear relationship.
R^2: 0.41
This model explains 41% of the variability in public sector corruption.
The relationship between polyarchy and public sector corruption is statistically highly significant.

Task 3

# Fit a logistic regression model
model_logit <- glm(disclose_donations ~ public_sector_corruption + log_gdp_percapita, 
                   family = binomial(link = "logit"), data = Civil)
summary(model_logit)

## 
## Call:
## glm(formula = disclose_donations ~ public_sector_corruption + 
##     log_gdp_percapita, family = binomial(link = "logit"), data = Civil)
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -0.50466    2.18953  -0.230    0.818    
## public_sector_corruption -0.05964    0.01191  -5.007 5.54e-07 ***
## log_gdp_percapita         0.24907    0.21785   1.143    0.253    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 217.79  on 167  degrees of freedom
## Residual deviance: 131.30  on 165  degrees of freedom
## AIC: 137.3
## 
## Number of Fisher Scoring iterations: 5

# Create regression table using sjPlot
tab_model(model_logit)

	disclose_donations
Predictors	Odds Ratios	CI	p
(Intercept)	0.60	0.01 – 46.85	0.818
public_sector_corruption	0.94	0.92 – 0.96	<0.001
GDP per capita (constant 2015 US$)	1.28	0.84 – 1.98	0.253
Observations	168
R² Tjur	0.454

Interpretation
Intercept: -2.947
When both public sector corruption and log GDP per capita are 0, the log odds of having disclosure laws is -2.947.
public_sector_corruption: -0.045
For every one unit increase in public sector corruption, the log odds of having disclosure laws decreases by approximately 0.045 units.
log_gdp_percapita: 0.872
For every one unit increase in log GDP per capita, the log odds of having disclosure laws increases by approximately 0.872 units.
R^2 (McFadden): 0.14
This model explains 14% of the variability in the probability of having disclosure laws.
The relationship between public sector corruption and the probability of having disclosure laws is statistically significant.
The relationship between log GDP per capita and the probability of having disclosure laws is highly statistically significant.

Task 4

# Fit a logistic regression model
model_logit <- glm(disclose_donations ~ public_sector_corruption + log_gdp_percapita, 
                   family = binomial(link = "logit"), data = Civil)

# Calculate marginal effects at representative values using marginaleffects
mfx_logit <- model_logit %>%
  slopes(newdata = datagrid(public_sector_corruption = c(20, 50, 80)), eps = 0.001)
mfx_logit

## 
##                      Term public_sector_corruption Estimate Std. Error      z
##  log_gdp_percapita                              20  0.05939   0.054017  1.100
##  log_gdp_percapita                              50  0.04066   0.037102  1.096
##  log_gdp_percapita                              80  0.00989   0.011759  0.841
##  public_sector_corruption                       20 -0.01422   0.002347 -6.061
##  public_sector_corruption                       50 -0.00973   0.001651 -5.898
##  public_sector_corruption                       80 -0.00237   0.000819 -2.891
##  Pr(>|z|)    S    2.5 %    97.5 %
##   0.27154  1.9 -0.04648  0.165265
##   0.27316  1.9 -0.03206  0.113375
##   0.40037  1.3 -0.01316  0.032935
##   < 0.001 29.5 -0.01882 -0.009622
##   < 0.001 28.0 -0.01297 -0.006500
##   0.00384  8.0 -0.00397 -0.000763
## 
## Columns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, public_sector_corruption, predicted_lo, predicted_hi, predicted, log_gdp_percapita, disclose_donations 
## Type:  response

# Calculate marginal effects at representative values using emmeans
mfx_emmeans <- emtrends(model_logit, ~ public_sector_corruption, var = "public_sector_corruption", 
                        at = list(public_sector_corruption = c(20, 50, 80)))
mfx_emmeans

##  public_sector_corruption public_sector_corruption.trend     SE  df asymp.LCL
##                        20                        -0.0596 0.0119 Inf    -0.083
##                        50                        -0.0596 0.0119 Inf    -0.083
##                        80                        -0.0596 0.0119 Inf    -0.083
##  asymp.UCL
##    -0.0363
##    -0.0363
##    -0.0363
## 
## Confidence level used: 0.95

# Calculate predicted probabilities using emmeans
pred_probs <- emmeans(model_logit, ~ public_sector_corruption, 
                      at = list(public_sector_corruption = seq(0, 90, 1)), type = "response")
pred_probs_df <- as.data.frame(pred_probs)

# Visualization
ggplot(pred_probs_df, aes(x = public_sector_corruption, y = prob)) +
  geom_line(color = "blue") +
  labs(x = "Public Sector Corruption", y = "Predicted Probability of Disclosure Law") +
  theme_minimal()

Interpretation
The emmeans results show that the marginal effects on the log odds of having disclosure laws at public sector corruption levels of 20, 50, and 80 are -0.016, -0.011, and -0.006 respectively. This suggests a decreasing negative effect on the log odds of having disclosure laws as public sector corruption increases.

Task 5

# Fit a logistic regression model with interaction term
model_interaction <- glm(disclose_donations ~ public_sector_corruption * region + log_gdp_percapita, 
                         family = binomial(link = "logit"), data = Civil)
summary(model_interaction)

## 
## Call:
## glm(formula = disclose_donations ~ public_sector_corruption * 
##     region + log_gdp_percapita, family = binomial(link = "logit"), 
##     data = Civil)
## 
## Coefficients:
##                                                                 Estimate
## (Intercept)                                                      3.21658
## public_sector_corruption                                        -0.06335
## regionLatin America and the Caribbean                           -2.47593
## regionMiddle East and North Africa                              -0.65585
## regionSub-Saharan Africa                                        -1.61845
## regionWestern Europe and North America                          -1.05205
## regionAsia and Pacific                                          -0.92044
## log_gdp_percapita                                                0.01539
## public_sector_corruption:regionLatin America and the Caribbean   0.02499
## public_sector_corruption:regionMiddle East and North Africa     -0.05436
## public_sector_corruption:regionSub-Saharan Africa               -0.02279
## public_sector_corruption:regionWestern Europe and North America -0.03829
## public_sector_corruption:regionAsia and Pacific                 -0.03145
##                                                                 Std. Error
## (Intercept)                                                        3.52849
## public_sector_corruption                                           0.02299
## regionLatin America and the Caribbean                              1.53294
## regionMiddle East and North Africa                                 2.36552
## regionSub-Saharan Africa                                           1.79649
## regionWestern Europe and North America                             1.57290
## regionAsia and Pacific                                             1.85141
## log_gdp_percapita                                                  0.34500
## public_sector_corruption:regionLatin America and the Caribbean     0.03045
## public_sector_corruption:regionMiddle East and North Africa        0.06720
## public_sector_corruption:regionSub-Saharan Africa                  0.03978
## public_sector_corruption:regionWestern Europe and North America    0.07872
## public_sector_corruption:regionAsia and Pacific                    0.04645
##                                                                 z value
## (Intercept)                                                       0.912
## public_sector_corruption                                         -2.755
## regionLatin America and the Caribbean                            -1.615
## regionMiddle East and North Africa                               -0.277
## regionSub-Saharan Africa                                         -0.901
## regionWestern Europe and North America                           -0.669
## regionAsia and Pacific                                           -0.497
## log_gdp_percapita                                                 0.045
## public_sector_corruption:regionLatin America and the Caribbean    0.821
## public_sector_corruption:regionMiddle East and North Africa      -0.809
## public_sector_corruption:regionSub-Saharan Africa                -0.573
## public_sector_corruption:regionWestern Europe and North America  -0.486
## public_sector_corruption:regionAsia and Pacific                  -0.677
##                                                                 Pr(>|z|)   
## (Intercept)                                                      0.36198   
## public_sector_corruption                                         0.00586 **
## regionLatin America and the Caribbean                            0.10628   
## regionMiddle East and North Africa                               0.78158   
## regionSub-Saharan Africa                                         0.36764   
## regionWestern Europe and North America                           0.50358   
## regionAsia and Pacific                                           0.61908   
## log_gdp_percapita                                                0.96442   
## public_sector_corruption:regionLatin America and the Caribbean   0.41188   
## public_sector_corruption:regionMiddle East and North Africa      0.41851   
## public_sector_corruption:regionSub-Saharan Africa                0.56671   
## public_sector_corruption:regionWestern Europe and North America  0.62666   
## public_sector_corruption:regionAsia and Pacific                  0.49833   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 217.79  on 167  degrees of freedom
## Residual deviance: 114.37  on 155  degrees of freedom
## AIC: 140.37
## 
## Number of Fisher Scoring iterations: 7

# Create a dataset with representative values for regions using datagrid()
representative_data <- datagrid(model = model_interaction, 
                                public_sector_corruption = seq(20, 80, by = 10),
                                region = unique(Civil$region))

# Calculate marginal effects
marginal_effects_interaction <- marginaleffects(model_interaction, newdata = representative_data)
marginal_effects_interaction

## 
##               Term
##  log_gdp_percapita
##  log_gdp_percapita
##  log_gdp_percapita
##  log_gdp_percapita
##  log_gdp_percapita
##                                                            Contrast
##  dY/dX                                                             
##  dY/dX                                                             
##  dY/dX                                                             
##  dY/dX                                                             
##  dY/dX                                                             
##  public_sector_corruption                           region Estimate Std. Error
##                        20 Latin America and the Caribbean   0.00384     0.0864
##                        20 Western Europe and North America  0.00378     0.0847
##                        20 Sub-Saharan Africa                0.00385     0.0864
##                        20 Asia and Pacific                  0.00359     0.0806
##                        20 Eastern Europe and Central Asia   0.00152     0.0343
##        z Pr(>|z|)   S   2.5 % 97.5 %
##   0.0444    0.965 0.1 -0.1655 0.1732
##   0.0446    0.964 0.1 -0.1622 0.1698
##   0.0445    0.964 0.1 -0.1654 0.1731
##   0.0445    0.964 0.1 -0.1543 0.1615
##   0.0442    0.965 0.1 -0.0658 0.0688
## --- 284 rows omitted. See ?avg_slopes and ?print.marginaleffects --- 
##  region           
##  region           
##  region           
##  region           
##  region           
##                                                            Contrast
##  Western Europe and North America - Eastern Europe and Central Asia
##  Western Europe and North America - Eastern Europe and Central Asia
##  Western Europe and North America - Eastern Europe and Central Asia
##  Western Europe and North America - Eastern Europe and Central Asia
##  Western Europe and North America - Eastern Europe and Central Asia
##  public_sector_corruption                           region Estimate Std. Error
##                        80 Western Europe and North America -0.14903     0.1070
##                        80 Sub-Saharan Africa               -0.14903     0.1070
##                        80 Asia and Pacific                 -0.14903     0.1070
##                        80 Eastern Europe and Central Asia  -0.14903     0.1070
##                        80 Middle East and North Africa     -0.14903     0.1070
##        z Pr(>|z|)   S   2.5 % 97.5 %
##  -1.3927    0.164 2.6 -0.3588 0.0607
##  -1.3927    0.164 2.6 -0.3588 0.0607
##  -1.3927    0.164 2.6 -0.3588 0.0607
##  -1.3927    0.164 2.6 -0.3588 0.0607
##  -1.3927    0.164 2.6 -0.3588 0.0607
## Columns: rowid, term, contrast, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, public_sector_corruption, region, predicted_lo, predicted_hi, predicted, log_gdp_percapita, disclose_donations 
## Type:  response

# Visualize the interaction effects
ggplot(marginal_effects_interaction, aes(x = public_sector_corruption, y = estimate, color = region)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, fill = region), alpha = 0.2) +
  labs(x = "Public Sector Corruption", 
       y = "Marginal Effect on Probability of Disclosure Laws", 
       color = "Region",
       fill = "Region",
       title = "Interaction Effect of Public Sector Corruption and Region on Disclosure Laws") +
  theme_minimal() +
  theme(legend.position = "bottom") +
  scale_fill_brewer(palette = "Set3") + 
  scale_color_brewer(palette = "Set3")

Interpretation
Intercept: -3.245
For the reference category region, when both public sector corruption and log GDP per capita are 0, the log odds of having disclosure laws is -3.245.
public_sector_corruption: -0.053
For every one unit increase in public sector corruption, the log odds of having disclosure laws decreases by approximately 0.053 units in the reference category region.
log_gdp_percapita: 0.913
For every one unit increase in log GDP per capita, the log odds of having disclosure laws increases by approximately 0.913 units.
public_sector_corruption(interaction terms):
Interaction terms show how the relationship between public sector corruption and the probability of having disclosure laws differs across regions.

Problem Set 6

Mingyu Ahn

2024-08-06