# Install necessary packages if not already installed
if (!require("ggplot2")) install.packages("ggplot2")
## Loading required package: ggplot2
if (!require("gridExtra")) install.packages("gridExtra")
## Loading required package: gridExtra
if (!require("stargazer")) install.packages("stargazer")
## Loading required package: stargazer
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Load necessary libraries
library(ggplot2)
library(gridExtra)
library(stargazer)
tunisia_prison_data <- data.frame(
Year = c(1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913,
1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1935,
1936, 1937, 1938, 1939, 1945, 1946, 1947, 1948, 1949, 1950,
1951, 1952, 1953, 1954, 1955, 1956),
Prison_Population = c(1.654, 1.889, 1.934, 1.747, 2.195, 2.717, 2.750,
2.737, 2.585, 2.510, 3.238, 3.104, 3.200, 3.021,
2.600, 2.479, 2.933, 2.816, 2.683, 3.020, 3.198,
3.251, 3.656, 3.472, 5.622, 4.526, 4.829, 4.672,
4.279, 3.275, 3.358, 4.810, 5.135, 3.999, 3.642,
3.803)
)
linear_model <- lm(Prison_Population ~ Year, data = tunisia_prison_data)
poly_model <- lm(Prison_Population ~ poly(Year, 2), data = tunisia_prison_data)
plot_poly <- ggplot(tunisia_prison_data, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
labs(title = "Carceral Population (Polynomial Reg)",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
plot_linear <- ggplot(tunisia_prison_data, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
labs(title = "Carceral Population (Linear Reg)",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
grid.arrange(plot_poly, plot_linear, ncol = 2)
linear_residuals <- resid(linear_model)
poly_residuals <- resid(poly_model)
residuals_linear_plot <- ggplot(tunisia_prison_data, aes(x = Year, y = linear_residuals)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
labs(title = "Residuals of Linear Model", x = "Year", y = "Residuals") +
theme_minimal()
residuals_poly_plot <- ggplot(tunisia_prison_data, aes(x = Year, y = poly_residuals)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
labs(title = "Residuals of Polynomial Model", x = "Year", y = "Residuals") +
theme_minimal()
grid.arrange(residuals_linear_plot, residuals_poly_plot, ncol = 2)
print(summary(linear_model))
##
## Call:
## lm(formula = Prison_Population ~ Year, data = tunisia_prison_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84521 -0.37233 -0.09943 0.32462 1.73549
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -87.02211 10.91461 -7.973 2.73e-09 ***
## Year 0.04674 0.00565 8.272 1.18e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5686 on 34 degrees of freedom
## Multiple R-squared: 0.668, Adjusted R-squared: 0.6583
## F-statistic: 68.42 on 1 and 34 DF, p-value: 1.183e-09
print(summary(poly_model))
##
## Call:
## lm(formula = Prison_Population ~ poly(Year, 2), data = tunisia_prison_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85967 -0.37501 -0.07558 0.34501 1.74105
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.25942 0.09608 33.926 < 2e-16 ***
## poly(Year, 2)1 4.70303 0.57645 8.159 2.03e-09 ***
## poly(Year, 2)2 0.15717 0.57645 0.273 0.787
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5765 on 33 degrees of freedom
## Multiple R-squared: 0.6688, Adjusted R-squared: 0.6487
## F-statistic: 33.32 on 2 and 33 DF, p-value: 1.207e-08
stargazer(linear_model, poly_model, type = "text", mean.sd = TRUE, min.max = TRUE, column.sep.width = "50 pt", title = "Regression Results")
##
## Regression Results
## =================================================================
## Dependent variable:
## ---------------------------------------------
## Prison_Population
## (1) (2)
## -----------------------------------------------------------------
## Year 0.047***
## (0.006)
##
## poly(Year, 2)1 4.703***
## (0.576)
##
## poly(Year, 2)2 0.157
## (0.576)
##
## Constant -87.022*** 3.259***
## (10.915) (0.096)
##
## -----------------------------------------------------------------
## Observations 36 36
## R2 0.668 0.669
## Adjusted R2 0.658 0.649
## Residual Std. Error 0.569 (df = 34) 0.576 (df = 33)
## F Statistic 68.425*** (df = 1; 34) 33.318*** (df = 2; 33)
## =================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
year_2021 <- data.frame(Year = 1996)
# Predict using the linear model
pred_linear_2021 <- predict(linear_model, newdata = year_2021)
# Predict using the polynomial model
pred_poly_2021 <- predict(poly_model, newdata = year_2021)
cat("Estimated Carceral Population in 2021 (Linear Model):", pred_linear_2021, "\n")
## Estimated Carceral Population in 2021 (Linear Model): 6.270229
cat("Estimated Carceral Population in 2021 (Polynomial Model):", pred_poly_2021, "\n")
## Estimated Carceral Population in 2021 (Polynomial Model): 6.732083
year_1996 <- data.frame(Year = 1996)
# Predict using the linear model
pred_linear_1996 <- predict(linear_model, newdata = year_1996)
# Predict using the polynomial model
pred_poly_1996 <- predict(poly_model, newdata = year_1996)
cat("Estimated Carceral Population in 1996 (Linear Model):", pred_linear_1996, "\n")
## Estimated Carceral Population in 1996 (Linear Model): 6.270229
cat("Estimated Carceral Population in 1996 (Polynomial Model):", pred_poly_1996, "\n")
## Estimated Carceral Population in 1996 (Polynomial Model): 6.732083
# Update the data frame to include the actual value for 2021
tunisia_prison_data_extended <- rbind(tunisia_prison_data, data.frame(Year = 2021, Prison_Population = 23.607))
# Predict the values for the extended data frame
tunisia_prison_data_extended$Predicted_Linear <- predict(linear_model, newdata = tunisia_prison_data_extended)
tunisia_prison_data_extended$Predicted_Poly <- predict(poly_model, newdata = tunisia_prison_data_extended)
# Plot the regression lines including the actual 2021 value
plot_linear_extended <- ggplot(tunisia_prison_data_extended, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
geom_point(aes(x = 2021, y = 23.607), color = "blue", size = 3, shape = 17) +
geom_point(aes(x = 2021, y = 23.607), color = "blue", size = 3, shape = 17) +
labs(title = "Carceral Population with 2021 Value",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
plot_poly_extended <- ggplot(tunisia_prison_data_extended, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
geom_point(aes(x = 2021, y = 23.607), color = "red", size = 3, shape = 17) +
labs(title = "Carceral Population with 2021 Value",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
grid.arrange(plot_linear_extended, plot_poly_extended, ncol = 2)
## Warning in geom_point(aes(x = 2021, y = 23.607), color = "blue", size = 3, : All aesthetics have length 1, but the data has 37 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## All aesthetics have length 1, but the data has 37 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
## Warning in geom_point(aes(x = 2021, y = 23.607), color = "red", size = 3, : All aesthetics have length 1, but the data has 37 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
# Additional data for the years since Ben Ali
ben_ali_data <- data.frame(
Year = c(1996, 2004, 2009, 2011, 2013, 2015, 2017, 2020, 2021),
Prison_Population = c(23.165, 26.000, 26.319, 21.000, 25.000, 23.000, 20.755, 22.964, 23.607)
)
# Combine the new data with the existing data
combined_data <- rbind(tunisia_prison_data, ben_ali_data)
# Fit the regression models for the combined data
benali_linear_model <- lm(Prison_Population ~ Year, data = ben_ali_data)
benali_poly_model <- lm(Prison_Population ~ poly(Year, 2), data = ben_ali_data)
# Plot the combined data with regression lines
plot_benali_linear <- ggplot(ben_ali_data, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
labs(title = "Carceral Population Over Time in Tunisia (Combined Linear Reg)",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
plot_benali_poly <- ggplot(ben_ali_data, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
labs(title = "Carceral Population Over Time in Tunisia (Combined Polynomial Reg)",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
grid.arrange(plot_benali_linear, plot_benali_poly, ncol = 2)
#Comparison of Colonial and Post-Colonial Periods
# Separate the data into colonial and post-colonial periods
colonial_data <- subset(tunisia_prison_data, Year <= 1956)
benali_data <- subset(combined_data, Year > 1956)
# Fit polynomial regression models for both periods
colonial_poly_model <- lm(Prison_Population ~ poly(Year, 2), data = colonial_data)
benali_poly_model <- lm(Prison_Population ~ poly(Year, 2), data = ben_ali_data)
# Plot the colonial period data with polynomial line
plot_colonial_poly <- ggplot(colonial_data, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
labs(title = "Colonial Carceral Population (Poly Reg)",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
# Plot the post-colonial period data with polynomial line
plot_post_colonial_poly <- ggplot(ben_ali_data, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
labs(title = "Carceral Population Since Ben Ali (Poly Reg)",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
# Print polynomial model summaries for both periods
stargazer(colonial_poly_model,benali_poly_model, type = "text" )
##
## ============================================================
## Dependent variable:
## ----------------------------------------
## Prison_Population
## (1) (2)
## ------------------------------------------------------------
## poly(Year, 2)1 4.703*** -1.665
## (0.576) (2.091)
##
## poly(Year, 2)2 0.157 -1.368
## (0.576) (2.091)
##
## Constant 3.259*** 23.534***
## (0.096) (0.697)
##
## ------------------------------------------------------------
## Observations 36 9
## R2 0.669 0.150
## Adjusted R2 0.649 -0.133
## Residual Std. Error 0.576 (df = 33) 2.091 (df = 6)
## F Statistic 33.318*** (df = 2; 33) 0.531 (df = 2; 6)
## ============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
grid.arrange(plot_colonial_poly, plot_benali_poly, ncol = 2)
#Comparison of Colonial and Post-Colonial Periods (Linear models)
# Fit linear regression models for both periods
colonial_linear_model <- lm(Prison_Population ~ Year, data = colonial_data)
benali_linear_model <- lm(Prison_Population ~ Year, data = ben_ali_data)
# Plot the colonial period data with regression line
plot_colonial_linear <- ggplot(colonial_data, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
labs(title = "Colonial Carceral Population ( Linear Reg)",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
# Plot the post-colonial period data with regression line
plot_post_colonial_linear <- ggplot(ben_ali_data, aes(x = Year, y = Prison_Population)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
labs(title = "Carceral Population Since Ben Ali (Linear Reg)",
x = "Year",
y = "Carceral Population (in thousands)") +
theme_minimal()
# Print linear model summaries for both periods
stargazer(colonial_linear_model,benali_linear_model, type = "text" )
##
## ============================================================
## Dependent variable:
## ----------------------------------------
## Prison_Population
## (1) (2)
## ------------------------------------------------------------
## Year 0.047*** -0.074
## (0.006) (0.089)
##
## Constant -87.022*** 171.897
## (10.915) (178.597)
##
## ------------------------------------------------------------
## Observations 36 9
## R2 0.668 0.090
## Adjusted R2 0.658 -0.040
## Residual Std. Error 0.569 (df = 34) 2.004 (df = 7)
## F Statistic 68.425*** (df = 1; 34) 0.690 (df = 1; 7)
## ============================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
grid.arrange(plot_colonial_linear, plot_benali_linear, ncol = 2)
# Create a combined data frame including predictions for a counterfactual scenario
# Assume colonial practices continued after 1956
# Generate counterfactual data based on colonial model
counterfactual_years <- data.frame(Year = seq(1957, 2021, by = 1))
counterfactual_predictions <- predict(colonial_linear_model, newdata = counterfactual_years)
counterfactual_data <- data.frame(Year = counterfactual_years$Year, Prison_Population = counterfactual_predictions)
# Define the eras
eras <- data.frame(
Era = c("Colonial", "Bourguiba", "Ben Ali", "Democracy"),
Start = c(1904, 1957, 1987, 2011),
End = c(1956, 1986, 2010, 2021)
)
# Plot the actual data and the counterfactual scenario
plot_counterfactual <- ggplot() +
# Add vertical zones for different eras
geom_rect(data = eras, aes(xmin = Start, xmax = End, ymin = -Inf, ymax = Inf, fill = Era), alpha = 0.15) +
# Plot colonial data points
geom_point(data = colonial_data, aes(x = Year, y = Prison_Population), color = "black", size = 1) +
# Plot post-colonial data points
geom_point(data = ben_ali_data, aes(x = Year, y = Prison_Population), color = "black", size = 1) +
# Add linear regression lines for colonial and post-colonial data
geom_smooth(data = colonial_data, aes(x = Year, y = Prison_Population), method = "lm", se = TRUE, color = "blue", linetype = "dashed") +
geom_smooth(data = ben_ali_data, aes(x = Year, y = Prison_Population), method = "lm", se = TRUE, color = "red", linetype = "solid") +
# Add counterfactual scenario line
geom_line(data = counterfactual_data, aes(x = Year, y = Prison_Population), color = "darkgreen", linetype = 4) +
# Customize plot labels and theme
labs(title = "Prison Population: Colonial vs. Post-Colonial Practices",
x = "Year",
y = "Prison Population (in thousands)") +
theme_minimal() +
scale_fill_manual(values = c("Colonial" = "grey", "Bourguiba" = "lightcoral", "Ben Ali" = "lightblue", "Democracy" = "lightgreen")) +
guides(fill = guide_legend(title = "Era"))
plot_counterfactual
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#Average Annual Increase in Carceral Capacity during Different Regimes
# Separate the colonial period into non-WWII and WWII periods
colonial_non_wwii_data <- subset(colonial_data, Year < 1939 | Year > 1945)
colonial_wwii_data <- subset(colonial_data, Year >= 1939 & Year <= 1945)
calculate_annual_increase <- function(data) {
data <- data[order(data$Year), ]
years <- diff(data$Year)
populations <- diff(data$Prison_Population)
annual_increase <- populations / years
annual_increase_percent <- (annual_increase / head(data$Prison_Population, -1)) * 100
avg_annual_increase <- mean(annual_increase)
avg_annual_increase_percent <- mean(annual_increase_percent)
list(
avg_annual_increase = avg_annual_increase,
avg_annual_increase_percent = avg_annual_increase_percent
)
}
# Calculate for each period
colonial_non_wwii_increase <- calculate_annual_increase(colonial_non_wwii_data)
colonial_wwii_increase <- calculate_annual_increase(colonial_wwii_data)
colonial_increase <- calculate_annual_increase(colonial_data)
benali_increase <- calculate_annual_increase(subset(ben_ali_data, Year <= 2010))
post_benali_increase <- calculate_annual_increase(subset(ben_ali_data, Year > 2010))
# Create a summary data frame
increase_summary <- data.frame(
Period = factor(c("Colonial (non-WWII)", "WWII", "Ben Ali", "Democracy"),
levels = c("Colonial (non-WWII)", "WWII", "Ben Ali", "Democracy")),
Avg_Annual_Increase = c(colonial_non_wwii_increase$avg_annual_increase,
colonial_wwii_increase$avg_annual_increase,
benali_increase$avg_annual_increase,
post_benali_increase$avg_annual_increase),
Avg_Annual_Increase_Percent = c(colonial_non_wwii_increase$avg_annual_increase_percent,
colonial_wwii_increase$avg_annual_increase_percent,
benali_increase$avg_annual_increase_percent,
post_benali_increase$avg_annual_increase_percent)
)
# Colors matching the previous chart
period_colors <- c("Colonial (non-WWII)" = "grey", "WWII" = "darkgrey", "Ben Ali" = "lightblue", "Democracy" = "lightgreen")
# Plot the average annual increase in actual numbers
plot_avg_increase <- ggplot(increase_summary, aes(x = Period, y = Avg_Annual_Increase*1000, fill = Period)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = period_colors) +
labs(title = "Evolution of Carceral Population",
x = "Period",
y = "Average Annual Increase (Total Number)") +
theme_minimal() +
theme(legend.position = "none") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Plot the average annual increase in percentages
plot_avg_increase_percent <- ggplot(increase_summary, aes(x = Period, y = Avg_Annual_Increase_Percent, fill = Period)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = period_colors) +
labs(title = "Evolution of Carceral Population",
x = "Period",
y = "Average Annual Increase (%)") +
theme_minimal() +
theme(legend.position = "none") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
grid.arrange(plot_avg_increase, plot_avg_increase_percent, ncol = 2)