load("Civil.RData")
Civil <- corruption

Task 1

str(Civil)

## 'data.frame':    168 obs. of  17 variables:
##  $ country_name            : chr  "Mexico" "Suriname" "Sweden" "Switzerland" ...
##  $ country_text_id         : chr  "MEX" "SUR" "SWE" "CHE" ...
##  $ year                    : num  2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...
##  $ region                  : Factor w/ 6 levels "Eastern Europe and Central Asia",..: 2 2 5 5 4 4 6 6 1 1 ...
##  $ disclose_donations_ord  : num  3 1 2 0 2 1 3 2 3 2 ...
##  $ public_sector_corruption: num  48.8 24.8 1.3 1.4 65.2 57.1 3.7 36.8 70.6 71.2 ...
##  $ polyarchy               : num  64.7 76.1 90.8 89.4 72 70.3 83.2 43.6 26.2 48.5 ...
##  $ civil_liberties         : num  71.2 87.7 96.9 94.8 90.4 82.2 92.8 56.9 43 85.5 ...
##  $ disclose_donations      : logi  TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ iso2c                   : chr  "MX" "SR" "SE" "CH" ...
##  $ population              : num  1.29e+08 5.87e+05 1.04e+07 8.64e+06 3.11e+07 ...
##   ..- attr(*, "label")= chr "Population, total"
##  $ gdp_percapita           : num  8923 7530 51542 85685 2021 ...
##   ..- attr(*, "label")= chr "GDP per capita (constant 2015 US$)"
##  $ capital                 : chr  "Mexico City" "Paramaribo" "Stockholm" "Bern" ...
##  $ longitude               : chr  "-99.1276" "-55.1679" "18.0645" "7.44821" ...
##  $ latitude                : chr  "19.427" "5.8232" "59.3327" "46.948" ...
##  $ income                  : chr  "Upper middle income" "Upper middle income" "High income" "High income" ...
##  $ log_gdp_percapita       : num  9.1 8.93 10.85 11.36 7.61 ...
##   ..- attr(*, "label")= chr "GDP per capita (constant 2015 US$)"

# Perform the linear regression
model <- lm(public_sector_corruption ~ polyarchy, data = Civil)

# Visualize the relationship with a scatter plot and overlay the regression line
ggplot(Civil, aes(x = polyarchy, y = public_sector_corruption)) +
  geom_point() +
  geom_smooth(method = "lm", col = "blue") +
  labs(title = "Relationship between Polyarchy and Public Sector Corruption",
       x = "Polyarchy",
       y = "Public Sector Corruption")

tab_model(model,
          show.ci = FALSE,  # Show confidence intervals
          show.se = TRUE,   # Show standard errors
          show.stat = TRUE, # Show statistics (t-value)
          show.p = TRUE,    # Show p-values
          title = "Regression Analysis of Public Sector Corruption",
          pred.labels = "Intercept",
          string.se = "Std. Error", 
          string.stat = "t value",
          string.p = "p value")

Regression Analysis of Public Sector Corruption
	public_sector_corruption
Predictors	Estimates	Std. Error	t value	p value
(Intercept)	89.44	3.95	22.62	<0.001
polyarchy	-0.83	0.07	-12.18	<0.001
Observations	168
R² / R² adjusted	0.472 / 0.469

# Print model summary for interpretation
summary(model)

## 
## Call:
## lm(formula = public_sector_corruption ~ polyarchy, data = Civil)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -69.498 -14.334   1.448  16.985  44.436 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 89.44444    3.95373   22.62   <2e-16 ***
## polyarchy   -0.82641    0.06786  -12.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.68 on 166 degrees of freedom
## Multiple R-squared:  0.4718, Adjusted R-squared:  0.4686 
## F-statistic: 148.3 on 1 and 166 DF,  p-value: < 2.2e-16

Interpretation

The scatter plot and regression table demonstratesa significant relationship between Polyarchy and Public Sector Corruption. As Polyarchy increases by 1 unit, Public Sector Corruption decreases by 0.83 units, with this predictive relationship being signficiant with 95% cofidence (p vlaue is <0.001). The y intercept is 89.44, demonstrating that when Polyarchy is zero, on average Public Sector Corruption scores at 89.44.

Task 2

# Add quadratic term to the dataset
Civil <- Civil %>%
  mutate(polyarchy2 = polyarchy^2)

# Perform the polynomial regression
model_poly <- lm(public_sector_corruption ~ polyarchy + polyarchy2, data = Civil)

# Print model summary for interpretation
summary(model_poly)

## 
## Call:
## lm(formula = public_sector_corruption ~ polyarchy + polyarchy2, 
##     data = Civil)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.462  -7.475   1.418  14.107  35.187 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 53.116104   7.019792   7.567 2.55e-12 ***
## polyarchy    0.974653   0.305335   3.192  0.00169 ** 
## polyarchy2  -0.017310   0.002874  -6.023 1.08e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.69 on 165 degrees of freedom
## Multiple R-squared:  0.567,  Adjusted R-squared:  0.5618 
## F-statistic:   108 on 2 and 165 DF,  p-value: < 2.2e-16

# Visualize the polynomial relationship with ggplot2
ggplot(Civil, aes(x = polyarchy, y = public_sector_corruption)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ poly(x, 2), col = "blue") +
  labs(title = "Polynomial Relationship between Polyarchy and Public Sector Corruption",
       x = "Polyarchy",
       y = "Public Sector Corruption")

Manual

# Extract coefficients
polarch_1 <- coef(model_poly)["polyarchy"]
polarch_2 <- coef(model_poly)["polyarchy2"]

# Function to calculate slope
Polyarchy_slope <- function(x) polarch_1 + (2 * polarch_2 * x)

# Calculate slopes at different levels of polyarchy
Polyarchy_slope(c(30, 60, 90))

## [1] -0.06392759 -1.10250800 -2.14108840

Marginal Effects Package

model_poly %>%
  slopes(newdata = datagrid(polyarchy = c(30, 60, 90)), eps = 0.001)

## 
##        Term polyarchy Estimate Std. Error     z Pr(>|z|)    S   2.5 %  97.5 %
##  polyarchy         30   0.9747    0.30504  3.20  0.00140  9.5  0.3768  1.5725
##  polyarchy         60   0.9747    0.30552  3.19  0.00142  9.5  0.3759  1.5735
##  polyarchy         90   0.9747    0.30534  3.19  0.00141  9.5  0.3762  1.5731
##  polyarchy2        30  -0.0173    0.00289 -5.99  < 0.001 28.8 -0.0230 -0.0116
##  polyarchy2        60  -0.0173    0.00293 -5.91  < 0.001 28.1 -0.0231 -0.0116
##  polyarchy2        90  -0.0173    0.00285 -6.07  < 0.001 29.6 -0.0229 -0.0117
## 
## Columns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, polyarchy, predicted_lo, predicted_hi, predicted, polyarchy2, public_sector_corruption 
## Type:  response

Interpretation:

At a polyarchy value of 30, the marginal effect is -0.064. This means that around this level of polyarchy, increasing polyarchy slightly is associated with an decrease in public sector corruption.

At a polyarchy value of 60, the marginal effect is -0.103. Here, an increase in polyarchy is associated with a decrease in public sector corruption.

At a polyarchy value of 90, the marginal effect is -2.141, indicating a stronger negative relationship at higher levels of polyarchy.

Task 3

Civil <- corruption

# Fit the logistic regression model
model_logistic <- glm(disclose_donations ~ public_sector_corruption + log_gdp_percapita, 
                      data = Civil, 
                      family = binomial)

summary(model_logistic)

## 
## Call:
## glm(formula = disclose_donations ~ public_sector_corruption + 
##     log_gdp_percapita, family = binomial, data = Civil)
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -0.50466    2.18953  -0.230    0.818    
## public_sector_corruption -0.05964    0.01191  -5.007 5.54e-07 ***
## log_gdp_percapita         0.24907    0.21785   1.143    0.253    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 217.79  on 167  degrees of freedom
## Residual deviance: 131.30  on 165  degrees of freedom
## AIC: 137.3
## 
## Number of Fisher Scoring iterations: 5

# Create a regression table using sjPlot
tab_model(model_logistic,
          show.ci = FALSE,
          show.se = TRUE,   # Show standard errors
          show.stat = TRUE, # Show statistics (e.g., z-values)
          show.p = TRUE,    # Show p-values
          title = "Logistic Regression Analysis of Campaign Finance Disclosure Laws",
          pred.labels = c("Intercept", "Public Sector Corruption", "Log GDP per Capita"),
          string.est = "Estimate",
          string.se = "Std. Error", 
          string.stat = "z value",
          string.p = "p value")

Logistic Regression Analysis of Campaign Finance Disclosure Laws
	disclose_donations
Predictors	Estimate	Std. Error	z value	p value
Intercept	0.60	1.32	-0.23	0.818
Public Sector Corruption	0.94	0.01	-5.01	<0.001
Log GDP per Capita	1.28	0.28	1.14	0.253
Observations	168
R² Tjur	0.454

Interpretation

The logistic regression model reveals that increased public sector corruption significantly increases the likelihood of having campaign finance disclosure laws, with for each unit increase in public sector corruption the log-odds of having campaign finance disclosure laws increase by 0.94 (p < 0.001). In contrast, log GDP per capita does not significantly affect the likelihood of having disclosure laws (p = 0.253). The intercept, representing the log-odds when predictors are zero, is not significant. The model fit has improved compared to the null model, indicating that the predictors explain some of the variability in the outcome. Overall, only public sector corruption has a significant impact on the presence of campaign finance disclosure laws.

Task 4

# Fit the logistic regression model again (if needed)
model_logistic <- glm(disclose_donations ~ log_gdp_percapita + public_sector_corruption, 
                      data = Civil, 
                      family = binomial)

# Calculate marginal effects using the marginaleffects package
marginal_effects <- model_logistic %>%
  marginaleffects(newdata = datagrid(public_sector_corruption = c(20, 50, 80)))

# Display the marginal effects at the specified values
marginal_effects

## 
##                      Term public_sector_corruption Estimate Std. Error      z
##  log_gdp_percapita                              20  0.05939   0.053994  1.100
##  log_gdp_percapita                              50  0.04066   0.037107  1.096
##  log_gdp_percapita                              80  0.00989   0.011758  0.841
##  public_sector_corruption                       20 -0.01422   0.002348 -6.057
##  public_sector_corruption                       50 -0.00973   0.001654 -5.886
##  public_sector_corruption                       80 -0.00237   0.000819 -2.891
##  Pr(>|z|)    S    2.5 %    97.5 %
##   0.27134  1.9 -0.04643  0.165221
##   0.27323  1.9 -0.03207  0.113385
##   0.40036  1.3 -0.01316  0.032935
##   < 0.001 29.4 -0.01882 -0.009620
##   < 0.001 27.9 -0.01298 -0.006493
##   0.00384  8.0 -0.00397 -0.000762
## 
## Columns: rowid, term, estimate, std.error, statistic, p.value, s.value, conf.low, conf.high, public_sector_corruption, predicted_lo, predicted_hi, predicted, log_gdp_percapita, disclose_donations 
## Type:  response

# Generate predicted probabilities using emmeans
predicted_probabilities <- emmeans(model_logistic, ~ public_sector_corruption, 
                                   at = list(public_sector_corruption = seq(0, 100, by = 1)),
                                   type = "response")

# Convert to a data frame for plotting
predicted_df <- as.data.frame(predicted_probabilities)

# Plot the predicted probabilities using ggplot2
ggplot(predicted_df, aes(x = public_sector_corruption, y = prob)) +
  geom_line(color = "blue") +
  geom_point(data = data.frame(public_sector_corruption = c(20, 50, 80), prob = marginal_effects$estimate), color = "red") +
  labs(title = "Predicted Probabilities of Campaign Finance Disclosure Laws",
       x = "Public Sector Corruption",
       y = "Predicted Probability") +
  theme_minimal()

Task 5

# Fit the logistic regression model with interaction term
model_interaction <- glm(disclose_donations ~ public_sector_corruption * region + log_gdp_percapita, 
                         data = Civil, 
                         family = binomial)

# Summarize the model
summary(model_interaction)

## 
## Call:
## glm(formula = disclose_donations ~ public_sector_corruption * 
##     region + log_gdp_percapita, family = binomial, data = Civil)
## 
## Coefficients:
##                                                                 Estimate
## (Intercept)                                                      3.21658
## public_sector_corruption                                        -0.06335
## regionLatin America and the Caribbean                           -2.47593
## regionMiddle East and North Africa                              -0.65585
## regionSub-Saharan Africa                                        -1.61845
## regionWestern Europe and North America                          -1.05205
## regionAsia and Pacific                                          -0.92044
## log_gdp_percapita                                                0.01539
## public_sector_corruption:regionLatin America and the Caribbean   0.02499
## public_sector_corruption:regionMiddle East and North Africa     -0.05436
## public_sector_corruption:regionSub-Saharan Africa               -0.02279
## public_sector_corruption:regionWestern Europe and North America -0.03829
## public_sector_corruption:regionAsia and Pacific                 -0.03145
##                                                                 Std. Error
## (Intercept)                                                        3.52849
## public_sector_corruption                                           0.02299
## regionLatin America and the Caribbean                              1.53294
## regionMiddle East and North Africa                                 2.36552
## regionSub-Saharan Africa                                           1.79649
## regionWestern Europe and North America                             1.57290
## regionAsia and Pacific                                             1.85141
## log_gdp_percapita                                                  0.34500
## public_sector_corruption:regionLatin America and the Caribbean     0.03045
## public_sector_corruption:regionMiddle East and North Africa        0.06720
## public_sector_corruption:regionSub-Saharan Africa                  0.03978
## public_sector_corruption:regionWestern Europe and North America    0.07872
## public_sector_corruption:regionAsia and Pacific                    0.04645
##                                                                 z value
## (Intercept)                                                       0.912
## public_sector_corruption                                         -2.755
## regionLatin America and the Caribbean                            -1.615
## regionMiddle East and North Africa                               -0.277
## regionSub-Saharan Africa                                         -0.901
## regionWestern Europe and North America                           -0.669
## regionAsia and Pacific                                           -0.497
## log_gdp_percapita                                                 0.045
## public_sector_corruption:regionLatin America and the Caribbean    0.821
## public_sector_corruption:regionMiddle East and North Africa      -0.809
## public_sector_corruption:regionSub-Saharan Africa                -0.573
## public_sector_corruption:regionWestern Europe and North America  -0.486
## public_sector_corruption:regionAsia and Pacific                  -0.677
##                                                                 Pr(>|z|)   
## (Intercept)                                                      0.36198   
## public_sector_corruption                                         0.00586 **
## regionLatin America and the Caribbean                            0.10628   
## regionMiddle East and North Africa                               0.78158   
## regionSub-Saharan Africa                                         0.36764   
## regionWestern Europe and North America                           0.50358   
## regionAsia and Pacific                                           0.61908   
## log_gdp_percapita                                                0.96442   
## public_sector_corruption:regionLatin America and the Caribbean   0.41188   
## public_sector_corruption:regionMiddle East and North Africa      0.41851   
## public_sector_corruption:regionSub-Saharan Africa                0.56671   
## public_sector_corruption:regionWestern Europe and North America  0.62666   
## public_sector_corruption:regionAsia and Pacific                  0.49833   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 217.79  on 167  degrees of freedom
## Residual deviance: 114.37  on 155  degrees of freedom
## AIC: 140.37
## 
## Number of Fisher Scoring iterations: 7

# Create a dataset with representative values for regions
interaction_data <- datagrid(model = model_interaction,
                             public_sector_corruption = seq(0, 100, by = 10),
                             region = unique(Civil$region))

# Generate predicted probabilities using emmeans for interaction effects
predicted_probs_interaction <- emmeans(model_interaction, ~ public_sector_corruption | region, 
                                       at = list(public_sector_corruption = seq(0, 100, by = 10)),
                                       type = "response")

# Convert to a data frame for plotting
predicted_interaction_df <- as.data.frame(predicted_probs_interaction)

# Plot the interaction effects using ggplot2
ggplot(predicted_interaction_df, aes(x = public_sector_corruption, y = prob, color = region)) +
  geom_line() +
  labs(title = "Interaction Effects of Public Sector Corruption and Region on Disclosure Laws",
       x = "Public Sector Corruption",
       y = "Predicted Probability of Disclosure Laws",
       color = "Region") +
  theme_minimal()

Interpret the results and discuss the implications of the interaction effect. # Interpretation

This visual shows among all regions in the region variable, as public sector corruption increases the predicted probability of disclosure laws decreases. The regions of the Middle East and North Africa, Sub-Saharan Africa, Western Europe and North America and Asia and the Pacidic all follow similar paths, with a strong negative correlation between probability of disclosure laws and public sector corruption between the units of 0 and 37.5 of public sector corruption, and then the negative correlation (slope) weakens. Eastern Europe and Central Asia has a weaker negative correlation between 0 and 25 units of public sector corruption, then the negative correlation stregnthens. Latin America anf the Caribbean has a relatively linear negative correlation between 0 and 100 units of public sector corruption. The interaction effect demsontrates varying regional relationships between probability of disclosure laws and public sector corruption, implying a higher probaility of disclosure laws and public sector corruption in Eastern Europe and Central Asia, and Latin America and the Caribbean compared to other regions.

Problem Set 6

Benny Konishi

2024-08-07

Task 1

Interpretation

Task 2

Manual

Marginal Effects Package

Interpretation:

Task 3

Interpretation

Task 4

Task 5