Install and Load Necessary Packages

# Install necessary packages if not already installed
if (!require("ggplot2")) install.packages("ggplot2")
## Loading required package: ggplot2
if (!require("gridExtra")) install.packages("gridExtra")
## Loading required package: gridExtra
if (!require("stargazer")) install.packages("stargazer")
## Loading required package: stargazer
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Load necessary libraries
library(ggplot2)
library(gridExtra)
library(stargazer)

Create the Data Frame with the yearly carceral population under French colonial rule in Tunisia

tunisia_prison_data <- data.frame(
  Year = c(1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 
           1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1935, 
           1936, 1937, 1938, 1939, 1945, 1946, 1947, 1948, 1949, 1950, 
           1951, 1952, 1953, 1954, 1955, 1956),
  Prison_Population = c(1.654, 1.889, 1.934, 1.747, 2.195, 2.717, 2.750, 
                        2.737, 2.585, 2.510, 3.238, 3.104, 3.200, 3.021, 
                        2.600, 2.479, 2.933, 2.816, 2.683, 3.020, 3.198, 
                        3.251, 3.656, 3.472, 5.622, 4.526, 4.829, 4.672, 
                        4.279, 3.275, 3.358, 4.810, 5.135, 3.999, 3.642, 
                        3.803)
)

Fit the Regression Models

linear_model <- lm(Prison_Population ~ Year, data = tunisia_prison_data)
poly_model <- lm(Prison_Population ~ poly(Year, 2), data = tunisia_prison_data)

Plot the Regression Lines

plot_poly <- ggplot(tunisia_prison_data, aes(x = Year, y = Prison_Population)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
  labs(title = "Carceral Population (Polynomial Reg)",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()

plot_linear <- ggplot(tunisia_prison_data, aes(x = Year, y = Prison_Population)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
  labs(title = "Carceral Population (Linear Reg)",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()

grid.arrange(plot_poly, plot_linear, ncol = 2)

Calculate and Plot Residuals

linear_residuals <- resid(linear_model)
poly_residuals <- resid(poly_model)

residuals_linear_plot <- ggplot(tunisia_prison_data, aes(x = Year, y = linear_residuals)) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Residuals of Linear Model", x = "Year", y = "Residuals") +
  theme_minimal()

residuals_poly_plot <- ggplot(tunisia_prison_data, aes(x = Year, y = poly_residuals)) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Residuals of Polynomial Model", x = "Year", y = "Residuals") +
  theme_minimal()

grid.arrange(residuals_linear_plot, residuals_poly_plot, ncol = 2)

Model Summaries

print(summary(linear_model))
## 
## Call:
## lm(formula = Prison_Population ~ Year, data = tunisia_prison_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84521 -0.37233 -0.09943  0.32462  1.73549 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -87.02211   10.91461  -7.973 2.73e-09 ***
## Year          0.04674    0.00565   8.272 1.18e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5686 on 34 degrees of freedom
## Multiple R-squared:  0.668,  Adjusted R-squared:  0.6583 
## F-statistic: 68.42 on 1 and 34 DF,  p-value: 1.183e-09
print(summary(poly_model))
## 
## Call:
## lm(formula = Prison_Population ~ poly(Year, 2), data = tunisia_prison_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.85967 -0.37501 -0.07558  0.34501  1.74105 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.25942    0.09608  33.926  < 2e-16 ***
## poly(Year, 2)1  4.70303    0.57645   8.159 2.03e-09 ***
## poly(Year, 2)2  0.15717    0.57645   0.273    0.787    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5765 on 33 degrees of freedom
## Multiple R-squared:  0.6688, Adjusted R-squared:  0.6487 
## F-statistic: 33.32 on 2 and 33 DF,  p-value: 1.207e-08

Regression Results Table

stargazer(linear_model, poly_model, type = "text", mean.sd = TRUE, min.max = TRUE,  column.sep.width = "50 pt", title = "Regression Results")
## 
## Regression Results
## =================================================================
##                                  Dependent variable:             
##                     ---------------------------------------------
##                                   Prison_Population              
##                              (1)                    (2)          
## -----------------------------------------------------------------
## Year                       0.047***                              
##                            (0.006)                               
##                                                                  
## poly(Year, 2)1                                    4.703***       
##                                                   (0.576)        
##                                                                  
## poly(Year, 2)2                                     0.157         
##                                                   (0.576)        
##                                                                  
## Constant                  -87.022***              3.259***       
##                            (10.915)               (0.096)        
##                                                                  
## -----------------------------------------------------------------
## Observations                  36                     36          
## R2                          0.668                  0.669         
## Adjusted R2                 0.658                  0.649         
## Residual Std. Error    0.569 (df = 34)        0.576 (df = 33)    
## F Statistic         68.425*** (df = 1; 34) 33.318*** (df = 2; 33)
## =================================================================
## Note:                                 *p<0.1; **p<0.05; ***p<0.01

Predict Carceral Population for 2021 according to the Colonial Pace

year_2021 <- data.frame(Year = 1996)

# Predict using the linear model
pred_linear_2021 <- predict(linear_model, newdata = year_2021)

# Predict using the polynomial model
pred_poly_2021 <- predict(poly_model, newdata = year_2021)

cat("Estimated Carceral Population in 2021 (Linear Model):", pred_linear_2021, "\n")
## Estimated Carceral Population in 2021 (Linear Model): 6.270229
cat("Estimated Carceral Population in 2021 (Polynomial Model):", pred_poly_2021, "\n")
## Estimated Carceral Population in 2021 (Polynomial Model): 6.732083

Predict Carceral Population for 1996 according to the Colonial Pace

year_1996 <- data.frame(Year = 1996)

# Predict using the linear model
pred_linear_1996 <- predict(linear_model, newdata = year_1996)

# Predict using the polynomial model
pred_poly_1996 <- predict(poly_model, newdata = year_1996)

cat("Estimated Carceral Population in 1996 (Linear Model):", pred_linear_1996, "\n")
## Estimated Carceral Population in 1996 (Linear Model): 6.270229
cat("Estimated Carceral Population in 1996 (Polynomial Model):", pred_poly_1996, "\n")
## Estimated Carceral Population in 1996 (Polynomial Model): 6.732083

Plot with Actual 2021 Value

# Update the data frame to include the actual value for 2021
tunisia_prison_data_extended <- rbind(tunisia_prison_data, data.frame(Year = 2021, Prison_Population = 23.607))

# Predict the values for the extended data frame
tunisia_prison_data_extended$Predicted_Linear <- predict(linear_model, newdata = tunisia_prison_data_extended)
tunisia_prison_data_extended$Predicted_Poly <- predict(poly_model, newdata = tunisia_prison_data_extended)

# Plot the regression lines including the actual 2021 value
plot_linear_extended <- ggplot(tunisia_prison_data_extended, aes(x = Year, y = Prison_Population)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
  geom_point(aes(x = 2021, y = 23.607), color = "blue", size = 3, shape = 17) +
  geom_point(aes(x = 2021, y = 23.607), color = "blue", size = 3, shape = 17) +
  labs(title = "Carceral Population with 2021 Value",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()

plot_poly_extended <- ggplot(tunisia_prison_data_extended, aes(x = Year, y = Prison_Population)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
  geom_point(aes(x = 2021, y = 23.607), color = "red", size = 3, shape = 17) +
  labs(title = "Carceral Population with 2021 Value",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()

grid.arrange(plot_linear_extended, plot_poly_extended, ncol = 2)
## Warning in geom_point(aes(x = 2021, y = 23.607), color = "blue", size = 3, : All aesthetics have length 1, but the data has 37 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## All aesthetics have length 1, but the data has 37 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## Warning in geom_point(aes(x = 2021, y = 23.607), color = "red", size = 3, : All aesthetics have length 1, but the data has 37 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

Actual Numbers Since Ben Ali

# Additional data for the years since Ben Ali
ben_ali_data <- data.frame(
  Year = c(1996, 2004, 2009, 2011, 2013, 2015, 2017, 2020, 2021),
  Prison_Population = c(23.165, 26.000, 26.319, 21.000, 25.000, 23.000, 20.755, 22.964, 23.607)
)

# Combine the new data with the existing data
combined_data <- rbind(tunisia_prison_data, ben_ali_data)

# Fit the regression models for the combined data
benali_linear_model <- lm(Prison_Population ~ Year, data = ben_ali_data)
benali_poly_model <- lm(Prison_Population ~ poly(Year, 2), data = ben_ali_data)

# Plot the combined data with regression lines
plot_benali_linear <- ggplot(ben_ali_data, aes(x = Year, y = Prison_Population)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
  labs(title = "Carceral Population Over Time in Tunisia (Combined Linear Reg)",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()

plot_benali_poly <- ggplot(ben_ali_data, aes(x = Year, y = Prison_Population)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
  labs(title = "Carceral Population Over Time in Tunisia (Combined Polynomial Reg)",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()
grid.arrange(plot_benali_linear, plot_benali_poly, ncol = 2)

#Comparison of Colonial and Post-Colonial Periods

# Separate the data into colonial and post-colonial periods
colonial_data <- subset(tunisia_prison_data, Year <= 1956)
benali_data <- subset(combined_data, Year > 1956)

# Fit polynomial regression models for both periods
colonial_poly_model <- lm(Prison_Population ~ poly(Year, 2), data = colonial_data)
benali_poly_model <- lm(Prison_Population ~ poly(Year, 2), data = ben_ali_data)

# Plot the colonial period data with polynomial line
plot_colonial_poly <- ggplot(colonial_data, aes(x = Year, y = Prison_Population)) +
  geom_point() +
    geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
  labs(title = "Colonial Carceral Population (Poly Reg)",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()

# Plot the post-colonial period data with polynomial line
plot_post_colonial_poly <- ggplot(ben_ali_data, aes(x = Year, y = Prison_Population)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "red", formula = y ~ poly(x, 2)) +
  labs(title = "Carceral Population Since Ben Ali (Poly Reg)",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()


# Print polynomial model summaries for both periods
stargazer(colonial_poly_model,benali_poly_model, type = "text" )
## 
## ============================================================
##                               Dependent variable:           
##                     ----------------------------------------
##                                Prison_Population            
##                              (1)                  (2)       
## ------------------------------------------------------------
## poly(Year, 2)1             4.703***             -1.665      
##                            (0.576)              (2.091)     
##                                                             
## poly(Year, 2)2              0.157               -1.368      
##                            (0.576)              (2.091)     
##                                                             
## Constant                   3.259***            23.534***    
##                            (0.096)              (0.697)     
##                                                             
## ------------------------------------------------------------
## Observations                  36                   9        
## R2                          0.669                0.150      
## Adjusted R2                 0.649               -0.133      
## Residual Std. Error    0.576 (df = 33)      2.091 (df = 6)  
## F Statistic         33.318*** (df = 2; 33) 0.531 (df = 2; 6)
## ============================================================
## Note:                            *p<0.1; **p<0.05; ***p<0.01
grid.arrange(plot_colonial_poly, plot_benali_poly, ncol = 2)

#Comparison of Colonial and Post-Colonial Periods (Linear models)

# Fit linear regression models for both periods
colonial_linear_model <- lm(Prison_Population ~ Year, data = colonial_data)
benali_linear_model <- lm(Prison_Population ~ Year, data = ben_ali_data)

# Plot the colonial period data with regression line
plot_colonial_linear <- ggplot(colonial_data, aes(x = Year, y = Prison_Population)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
  labs(title = "Colonial Carceral Population ( Linear Reg)",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()

# Plot the post-colonial period data with regression line
plot_post_colonial_linear <- ggplot(ben_ali_data, aes(x = Year, y = Prison_Population)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "blue", formula = y ~ x, linetype = "dashed") +
  labs(title = "Carceral Population Since Ben Ali (Linear Reg)",
       x = "Year",
       y = "Carceral Population (in thousands)") +
  theme_minimal()

# Print linear model summaries for both periods
stargazer(colonial_linear_model,benali_linear_model, type = "text" )
## 
## ============================================================
##                               Dependent variable:           
##                     ----------------------------------------
##                                Prison_Population            
##                              (1)                  (2)       
## ------------------------------------------------------------
## Year                       0.047***             -0.074      
##                            (0.006)              (0.089)     
##                                                             
## Constant                  -87.022***            171.897     
##                            (10.915)            (178.597)    
##                                                             
## ------------------------------------------------------------
## Observations                  36                   9        
## R2                          0.668                0.090      
## Adjusted R2                 0.658               -0.040      
## Residual Std. Error    0.569 (df = 34)      2.004 (df = 7)  
## F Statistic         68.425*** (df = 1; 34) 0.690 (df = 1; 7)
## ============================================================
## Note:                            *p<0.1; **p<0.05; ***p<0.01
grid.arrange(plot_colonial_linear, plot_benali_linear, ncol = 2)

Visual Comparison of Colonial vs. Post-Colonial Carceral Practices

# Create a combined data frame including predictions for a counterfactual scenario
# Assume colonial practices continued after 1956 

# Generate counterfactual data based on colonial model
counterfactual_years <- data.frame(Year = seq(1957, 2021, by = 1))
counterfactual_predictions <- predict(colonial_linear_model, newdata = counterfactual_years)
counterfactual_data <- data.frame(Year = counterfactual_years$Year, Prison_Population = counterfactual_predictions)

# Define the eras
eras <- data.frame(
  Era = c("Colonial", "Bourguiba", "Ben Ali", "Democracy"),
  Start = c(1904, 1957, 1987, 2011),
  End = c(1956, 1986, 2010, 2021)
)

# Plot the actual data and the counterfactual scenario
plot_counterfactual <- ggplot() +
  # Add vertical zones for different eras
  geom_rect(data = eras, aes(xmin = Start, xmax = End, ymin = -Inf, ymax = Inf, fill = Era), alpha = 0.15) +
  # Plot colonial data points
  geom_point(data = colonial_data, aes(x = Year, y = Prison_Population), color = "black", size = 1) +
  # Plot post-colonial data points
  geom_point(data = ben_ali_data, aes(x = Year, y = Prison_Population), color = "black", size = 1) +
  # Add linear regression lines for colonial and post-colonial data
  geom_smooth(data = colonial_data, aes(x = Year, y = Prison_Population), method = "lm", se = TRUE, color = "blue", linetype = "dashed") +
  geom_smooth(data = ben_ali_data, aes(x = Year, y = Prison_Population), method = "lm", se = TRUE, color = "red", linetype = "solid") +
  # Add counterfactual scenario line
  geom_line(data = counterfactual_data, aes(x = Year, y = Prison_Population), color = "darkgreen", linetype = 4) +
  # Customize plot labels and theme
  labs(title = "Prison Population: Colonial vs. Post-Colonial Practices",
       x = "Year",
       y = "Prison Population (in thousands)") +
  theme_minimal() +
  scale_fill_manual(values = c("Colonial" = "grey", "Bourguiba" = "lightcoral", "Ben Ali" = "lightblue", "Democracy" = "lightgreen")) +
  guides(fill = guide_legend(title = "Era"))

plot_counterfactual
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Average Annual Increase in Carceral Capacity during Different Regimes

# Separate the colonial period into non-WWII and WWII periods
colonial_non_wwii_data <- subset(colonial_data, Year < 1939 | Year > 1945)
colonial_wwii_data <- subset(colonial_data, Year >= 1939 & Year <= 1945)

calculate_annual_increase <- function(data) {
  data <- data[order(data$Year), ]
  years <- diff(data$Year)
  populations <- diff(data$Prison_Population)
  
  annual_increase <- populations / years
  annual_increase_percent <- (annual_increase / head(data$Prison_Population, -1)) * 100
  
  avg_annual_increase <- mean(annual_increase)
  avg_annual_increase_percent <- mean(annual_increase_percent)
  
  list(
    avg_annual_increase = avg_annual_increase,
    avg_annual_increase_percent = avg_annual_increase_percent
  )
}

# Calculate for each period
colonial_non_wwii_increase <- calculate_annual_increase(colonial_non_wwii_data)
colonial_wwii_increase <- calculate_annual_increase(colonial_wwii_data)
colonial_increase <- calculate_annual_increase(colonial_data)
benali_increase <- calculate_annual_increase(subset(ben_ali_data, Year <= 2010))
post_benali_increase <- calculate_annual_increase(subset(ben_ali_data, Year > 2010))

# Create a summary data frame
increase_summary <- data.frame(
  Period = factor(c("Colonial (non-WWII)", "WWII", "Ben Ali", "Democracy"), 
                  levels = c("Colonial (non-WWII)", "WWII", "Ben Ali", "Democracy")),
  Avg_Annual_Increase = c(colonial_non_wwii_increase$avg_annual_increase, 
                          colonial_wwii_increase$avg_annual_increase, 
                          benali_increase$avg_annual_increase, 
                          post_benali_increase$avg_annual_increase),
  Avg_Annual_Increase_Percent = c(colonial_non_wwii_increase$avg_annual_increase_percent, 
                                  colonial_wwii_increase$avg_annual_increase_percent, 
                                  benali_increase$avg_annual_increase_percent, 
                                  post_benali_increase$avg_annual_increase_percent)
)

# Colors matching the previous chart
period_colors <- c("Colonial (non-WWII)" = "grey", "WWII" = "darkgrey", "Ben Ali" = "lightblue", "Democracy" = "lightgreen")

# Plot the average annual increase in actual numbers
plot_avg_increase <- ggplot(increase_summary, aes(x = Period, y = Avg_Annual_Increase*1000, fill = Period)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = period_colors) +
  labs(title = "Evolution of Carceral Population",
       x = "Period",
       y = "Average Annual Increase (Total Number)") +
  theme_minimal() +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Plot the average annual increase in percentages
plot_avg_increase_percent <- ggplot(increase_summary, aes(x = Period, y = Avg_Annual_Increase_Percent, fill = Period)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = period_colors) +
  labs(title = "Evolution of Carceral Population",
       x = "Period",
       y = "Average Annual Increase (%)") +
  theme_minimal() +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

grid.arrange(plot_avg_increase, plot_avg_increase_percent, ncol = 2)