library(readxl)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggrepel)
## Loading required package: ggplot2

Importing Political Stability

years <- seq(2006, 2024, 2)

BTI_list <- lapply(years, function(y) {
  read_excel(
    "/Users/lilydorathy/Library/CloudStorage/OneDrive-UniversityofVirginia/2025-26/2026 Spring/STAT 4996 Capstone/Project/BTI_2006-2024_Scores.xlsx",
    sheet = paste("BTI", y)
  )
})
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `S | Status Index` -> `S | Status Index...4`
## • `SI | Democracy Status` -> `SI | Democracy Status...5`
## • `SII | Economy Status` -> `SII | Economy Status...29`
## • `G | Governance Index` -> `G | Governance Index...52`
## • `Q13 | Level of Difficulty` -> `Q13 | Level of Difficulty...53`
## • `GII | Governance Performance` -> `GII | Governance Performance...60`
## • `` -> `...79`
## • `` -> `...84`
## • `` -> `...88`
## • `` -> `...92`
## • `` -> `...93`
## • `` -> `...104`
## • `S | Status Index` -> `S | Status Index...106`
## • `Category` -> `Category...107`
## • `` -> `...108`
## • `SI | Democracy Status` -> `SI | Democracy Status...109`
## • `Category` -> `Category...110`
## • `` -> `...111`
## • `SII | Economy Status` -> `SII | Economy Status...112`
## • `Category` -> `Category...113`
## • `` -> `...114`
## • `G | Governance Index` -> `G | Governance Index...115`
## • `Category` -> `Category...116`
## • `` -> `...117`
## • `Q13 | Level of Difficulty` -> `Q13 | Level of Difficulty...118`
## • `Category` -> `Category...119`
## • `` -> `...120`
## • `GII | Governance Performance` -> `GII | Governance Performance...121`
## • `Category` -> `Category...122`
## • `` -> `...123`
names(BTI_list) <- paste0("BTI_", years)



BTI_clean_list <- Map(function(df, y) {
  df %>%
    rename(country_name = 1) %>%
    select(
      country_name,
      region = Region,

      # broad indices
      status_index              = `S | Status Index...4`,
      democracy_status          = `SI | Democracy Status...5`,
      economy_status            = `SII | Economy Status...29`,
      governance_index          = `G | Governance Index...52`,
      governance_perf           = `GII | Governance Performance...60`,

      # political components
      stateness                 = `Q1 | Stateness`,
      political_participation   = `Q2 | Political Participation`,
      rule_of_law               = `Q3 | Rule of Law`,
      democratic_stability      = `Q4 | Stability of Democratic Institutions`,
      political_integration     = `Q5 | Political and Social Integration`,

      # governance components
      civil_society             = `Q13.2 | Civil society traditions`,
      conflict_intensity        = `Q13.3 | Conflict intensity`,
      steering_capability       = `Q14 | Steering Capability`,
      resource_efficiency       = `Q15 | Resource Efficiency`,
      corruption_policy         = `Q15.3 | Anti-corruption policy`,
      consensus_building        = `Q16 | Consensus-Building`,
      conflict_management       = `Q16.3 | Cleavage / conflict management`,
      international_cooperation = `Q17 | International Cooperation`,
      credibility               = `Q17.2 | Credibility`
    ) %>%
    mutate(
      across(
        -c(country_name, region),
        ~ readr::parse_number(
          na = "-",
          as.character(.)
          )
      ),
      year = y
    ) %>%
    relocate(year, .after = region)
}, BTI_list, years)

names(BTI_clean_list) <- paste0("BTI_", years)

BTI_clean_panel <- bind_rows(BTI_clean_list)

BTI_clean_panel
## # A tibble: 1,370 × 22
##    country_name region  year status_index democracy_status economy_status
##    <chr>         <dbl> <dbl>        <dbl>            <dbl>          <dbl>
##  1 Afghanistan       7  2006         3.02             2.97           3.07
##  2 Albania           1  2006         6.61             7.25           5.96
##  3 Algeria           4  2006         4.42             4.23           4.61
##  4 Angola            5  2006         3.41             3.38           3.43
##  5 Argentina         2  2006         7.21             7.85           6.57
##  6 Armenia           6  2006         5.81             5.2            6.43
##  7 Azerbaijan        6  2006         4.51             3.8            5.21
##  8 Bahrain           4  2006         6.21             4.92           7.5 
##  9 Bangladesh        7  2006         6.01             6.55           5.46
## 10 Belarus           6  2006         4.47             3.97           4.96
## # ℹ 1,360 more rows
## # ℹ 16 more variables: governance_index <dbl>, governance_perf <dbl>,
## #   stateness <dbl>, political_participation <dbl>, rule_of_law <dbl>,
## #   democratic_stability <dbl>, political_integration <dbl>,
## #   civil_society <dbl>, conflict_intensity <dbl>, steering_capability <dbl>,
## #   resource_efficiency <dbl>, corruption_policy <dbl>,
## #   consensus_building <dbl>, conflict_management <dbl>, …
#write_excel_csv(BTI_clean_panel, file = "BTI_clean_panel.csv")

Importing Education Quality

Lee_Lee_2025 <- read_csv("Lee_Lee_2025.csv")
## Rows: 5454 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CountryName, CountryCode
## dbl (8): Year, Observed_data_flag, Tscore_INT, Tscore_ML, Tscore1519_INT, Ts...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
quality_clean <- Lee_Lee_2025 %>%
  rename(
    country_name = CountryName, 
    year = Year
  ) %>%
  select(
    country_name, 
    year, 
    Tscore1519_ML
  ) %>%
  filter(year >= 2006)

Importing Education Quantity

quantity <- read_csv("primary-secondary-enrollment-completion-rates.csv")
## Rows: 10628 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (6): Year, Primary enrolment, Secondary enrolment, Tertiary enrolment, P...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
quantity_clean <- quantity %>%
  clean_names() %>%
  rename(
    country_name = entity
  ) %>%
  filter(year >= 2006) %>%
  select(
    country_name,
    year,
    primary_enrolment,
    secondary_enrolment,
    tertiary_enrolment
  ) %>% 
  filter (
    year >= 2006
  )

unique(quantity_clean$country_name)
##   [1] "Afghanistan"                                        
##   [2] "Albania"                                            
##   [3] "Algeria"                                            
##   [4] "Andorra"                                            
##   [5] "Angola"                                             
##   [6] "Anguilla"                                           
##   [7] "Antigua and Barbuda"                                
##   [8] "Argentina"                                          
##   [9] "Armenia"                                            
##  [10] "Aruba"                                              
##  [11] "Australia"                                          
##  [12] "Austria"                                            
##  [13] "Azerbaijan"                                         
##  [14] "Bahamas"                                            
##  [15] "Bahrain"                                            
##  [16] "Bangladesh"                                         
##  [17] "Barbados"                                           
##  [18] "Belarus"                                            
##  [19] "Belgium"                                            
##  [20] "Belize"                                             
##  [21] "Benin"                                              
##  [22] "Bermuda"                                            
##  [23] "Bhutan"                                             
##  [24] "Bolivia"                                            
##  [25] "Bosnia and Herzegovina"                             
##  [26] "Botswana"                                           
##  [27] "Brazil"                                             
##  [28] "British Virgin Islands"                             
##  [29] "Brunei"                                             
##  [30] "Bulgaria"                                           
##  [31] "Burkina Faso"                                       
##  [32] "Burundi"                                            
##  [33] "Cambodia"                                           
##  [34] "Cameroon"                                           
##  [35] "Canada"                                             
##  [36] "Cape Verde"                                         
##  [37] "Cayman Islands"                                     
##  [38] "Central African Republic"                           
##  [39] "Central and Southern Asia (SDG)"                    
##  [40] "Chad"                                               
##  [41] "Chile"                                              
##  [42] "China"                                              
##  [43] "Colombia"                                           
##  [44] "Comoros"                                            
##  [45] "Congo"                                              
##  [46] "Cook Islands"                                       
##  [47] "Costa Rica"                                         
##  [48] "Cote d'Ivoire"                                      
##  [49] "Croatia"                                            
##  [50] "Cuba"                                               
##  [51] "Curacao"                                            
##  [52] "Cyprus"                                             
##  [53] "Czechia"                                            
##  [54] "Democratic Republic of Congo"                       
##  [55] "Denmark"                                            
##  [56] "Djibouti"                                           
##  [57] "Dominica"                                           
##  [58] "Dominican Republic"                                 
##  [59] "East Asia and Pacific (WB)"                         
##  [60] "East Timor"                                         
##  [61] "Eastern and South-Eastern Asia (SDG)"               
##  [62] "Ecuador"                                            
##  [63] "Egypt"                                              
##  [64] "El Salvador"                                        
##  [65] "Equatorial Guinea"                                  
##  [66] "Eritrea"                                            
##  [67] "Estonia"                                            
##  [68] "Eswatini"                                           
##  [69] "Ethiopia"                                           
##  [70] "Europe and Central Asia (WB)"                       
##  [71] "Europe and Northern America (SDG)"                  
##  [72] "European Union (27)"                                
##  [73] "Fiji"                                               
##  [74] "Finland"                                            
##  [75] "France"                                             
##  [76] "Gabon"                                              
##  [77] "Gambia"                                             
##  [78] "Georgia"                                            
##  [79] "Germany"                                            
##  [80] "Ghana"                                              
##  [81] "Gibraltar"                                          
##  [82] "Greece"                                             
##  [83] "Grenada"                                            
##  [84] "Guatemala"                                          
##  [85] "Guinea"                                             
##  [86] "Guinea-Bissau"                                      
##  [87] "Guyana"                                             
##  [88] "Haiti"                                              
##  [89] "High-income countries"                              
##  [90] "Honduras"                                           
##  [91] "Hong Kong"                                          
##  [92] "Hungary"                                            
##  [93] "Iceland"                                            
##  [94] "India"                                              
##  [95] "Indonesia"                                          
##  [96] "Iran"                                               
##  [97] "Iraq"                                               
##  [98] "Ireland"                                            
##  [99] "Israel"                                             
## [100] "Italy"                                              
## [101] "Jamaica"                                            
## [102] "Japan"                                              
## [103] "Jordan"                                             
## [104] "Kazakhstan"                                         
## [105] "Kenya"                                              
## [106] "Kiribati"                                           
## [107] "Kuwait"                                             
## [108] "Kyrgyzstan"                                         
## [109] "Laos"                                               
## [110] "Latin America and Caribbean (WB)"                   
## [111] "Latin America and the Caribbean (SDG)"              
## [112] "Latvia"                                             
## [113] "Lebanon"                                            
## [114] "Lesotho"                                            
## [115] "Liberia"                                            
## [116] "Libya"                                              
## [117] "Liechtenstein"                                      
## [118] "Lithuania"                                          
## [119] "Low-income countries"                               
## [120] "Lower-middle-income countries"                      
## [121] "Luxembourg"                                         
## [122] "Macao"                                              
## [123] "Madagascar"                                         
## [124] "Malawi"                                             
## [125] "Malaysia"                                           
## [126] "Maldives"                                           
## [127] "Mali"                                               
## [128] "Malta"                                              
## [129] "Marshall Islands"                                   
## [130] "Mauritania"                                         
## [131] "Mauritius"                                          
## [132] "Mexico"                                             
## [133] "Micronesia (country)"                               
## [134] "Middle East and North Africa (WB)"                  
## [135] "Middle-income countries"                            
## [136] "Moldova"                                            
## [137] "Monaco"                                             
## [138] "Mongolia"                                           
## [139] "Montenegro"                                         
## [140] "Montserrat"                                         
## [141] "Morocco"                                            
## [142] "Mozambique"                                         
## [143] "Myanmar"                                            
## [144] "Namibia"                                            
## [145] "Nauru"                                              
## [146] "Nepal"                                              
## [147] "Netherlands"                                        
## [148] "New Caledonia"                                      
## [149] "New Zealand"                                        
## [150] "Nicaragua"                                          
## [151] "Niger"                                              
## [152] "Nigeria"                                            
## [153] "Niue"                                               
## [154] "North America (WB)"                                 
## [155] "North Korea"                                        
## [156] "North Macedonia"                                    
## [157] "Northern Africa and Western Asia (SDG)"             
## [158] "Norway"                                             
## [159] "Oceania (excluding Australia and New Zealand) (SDG)"
## [160] "Oman"                                               
## [161] "Pakistan"                                           
## [162] "Palau"                                              
## [163] "Palestine"                                          
## [164] "Panama"                                             
## [165] "Papua New Guinea"                                   
## [166] "Paraguay"                                           
## [167] "Peru"                                               
## [168] "Philippines"                                        
## [169] "Poland"                                             
## [170] "Portugal"                                           
## [171] "Puerto Rico"                                        
## [172] "Qatar"                                              
## [173] "Romania"                                            
## [174] "Russia"                                             
## [175] "Rwanda"                                             
## [176] "Saint Kitts and Nevis"                              
## [177] "Saint Lucia"                                        
## [178] "Saint Vincent and the Grenadines"                   
## [179] "Samoa"                                              
## [180] "San Marino"                                         
## [181] "Sao Tome and Principe"                              
## [182] "Saudi Arabia"                                       
## [183] "Senegal"                                            
## [184] "Serbia"                                             
## [185] "Seychelles"                                         
## [186] "Sierra Leone"                                       
## [187] "Singapore"                                          
## [188] "Sint Maarten (Dutch part)"                          
## [189] "Slovakia"                                           
## [190] "Slovenia"                                           
## [191] "Solomon Islands"                                    
## [192] "Somalia"                                            
## [193] "South Africa"                                       
## [194] "South Asia (WB)"                                    
## [195] "South Korea"                                        
## [196] "South Sudan"                                        
## [197] "Spain"                                              
## [198] "Sri Lanka"                                          
## [199] "Sub-Saharan Africa (SDG)"                           
## [200] "Sub-Saharan Africa (WB)"                            
## [201] "Sudan"                                              
## [202] "Suriname"                                           
## [203] "Sweden"                                             
## [204] "Switzerland"                                        
## [205] "Syria"                                              
## [206] "Tajikistan"                                         
## [207] "Tanzania"                                           
## [208] "Thailand"                                           
## [209] "Togo"                                               
## [210] "Tokelau"                                            
## [211] "Tonga"                                              
## [212] "Trinidad and Tobago"                                
## [213] "Tunisia"                                            
## [214] "Turkey"                                             
## [215] "Turkmenistan"                                       
## [216] "Turks and Caicos Islands"                           
## [217] "Tuvalu"                                             
## [218] "Uganda"                                             
## [219] "Ukraine"                                            
## [220] "United Arab Emirates"                               
## [221] "United Kingdom"                                     
## [222] "United States"                                      
## [223] "Upper-middle-income countries"                      
## [224] "Uruguay"                                            
## [225] "Uzbekistan"                                         
## [226] "Vanuatu"                                            
## [227] "Venezuela"                                          
## [228] "Vietnam"                                            
## [229] "World"                                              
## [230] "Yemen"                                              
## [231] "Zambia"                                             
## [232] "Zimbabwe"

Merging

merged <- BTI_clean_panel %>%
  left_join(quantity_clean, by = c("country_name", "year")) %>%
  left_join(quality_clean, by = c("country_name", "year"))

colnames(merged)
##  [1] "country_name"              "region"                   
##  [3] "year"                      "status_index"             
##  [5] "democracy_status"          "economy_status"           
##  [7] "governance_index"          "governance_perf"          
##  [9] "stateness"                 "political_participation"  
## [11] "rule_of_law"               "democratic_stability"     
## [13] "political_integration"     "civil_society"            
## [15] "conflict_intensity"        "steering_capability"      
## [17] "resource_efficiency"       "corruption_policy"        
## [19] "consensus_building"        "conflict_management"      
## [21] "international_cooperation" "credibility"              
## [23] "primary_enrolment"         "secondary_enrolment"      
## [25] "tertiary_enrolment"        "Tscore1519_ML"
write_excel_csv(merged, file = "merged.csv")

Checks

merged %>%
  count(country_name, year) %>%
  filter(n > 1)
## # A tibble: 0 × 3
## # ℹ 3 variables: country_name <chr>, year <dbl>, n <int>
range(merged$year)
## [1] 2006 2024
colSums(is.na(merged))
##              country_name                    region                      year 
##                         0                         0                         0 
##              status_index          democracy_status            economy_status 
##                        72                        72                        72 
##          governance_index           governance_perf                 stateness 
##                        73                        73                        72 
##   political_participation               rule_of_law      democratic_stability 
##                        72                        72                        72 
##     political_integration             civil_society        conflict_intensity 
##                        72                        72                        72 
##       steering_capability       resource_efficiency         corruption_policy 
##                        73                        73                        73 
##        consensus_building       conflict_management international_cooperation 
##                        73                        73                        73 
##               credibility         primary_enrolment       secondary_enrolment 
##                        73                       390                       557 
##        tertiary_enrolment             Tscore1519_ML 
##                       533                       821
merged_clean <- merged %>% 
  drop_na(status_index, governance_index, Tscore1519_ML, primary_enrolment, secondary_enrolment, tertiary_enrolment)
colSums(is.na(merged_clean))
##              country_name                    region                      year 
##                         0                         0                         0 
##              status_index          democracy_status            economy_status 
##                         0                         0                         0 
##          governance_index           governance_perf                 stateness 
##                         0                         0                         0 
##   political_participation               rule_of_law      democratic_stability 
##                         0                         0                         0 
##     political_integration             civil_society        conflict_intensity 
##                         0                         0                         0 
##       steering_capability       resource_efficiency         corruption_policy 
##                         0                         0                         0 
##        consensus_building       conflict_management international_cooperation 
##                         0                         0                         0 
##               credibility         primary_enrolment       secondary_enrolment 
##                         0                         0                         0 
##        tertiary_enrolment             Tscore1519_ML 
##                         0                         0
glimpse(merged_clean)
## Rows: 398
## Columns: 26
## $ country_name              <chr> "Albania", "Algeria", "Argentina", "Armenia"…
## $ region                    <dbl> 1, 4, 2, 6, 4, 5, 1, 7, 2, 2, 1, 1, 2, 1, 6,…
## $ year                      <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 20…
## $ status_index              <dbl> 6.607143, 4.420238, 7.210714, 5.814286, 6.20…
## $ democracy_status          <dbl> 7.250000, 4.233333, 7.850000, 5.200000, 4.91…
## $ economy_status            <dbl> 5.964286, 4.607143, 6.571429, 6.428571, 7.50…
## $ governance_index          <dbl> 5.331435, 3.648966, 5.514074, 5.138765, 4.85…
## $ governance_perf           <dbl> 6.016667, 3.966667, 6.400000, 5.866667, 5.66…
## $ stateness                 <dbl> 8.00, 7.00, 8.75, 8.75, 8.25, 9.00, 9.25, 7.…
## $ political_participation   <dbl> 8.00, 4.25, 9.50, 5.00, 4.00, 9.00, 9.25, 4.…
## $ rule_of_law               <dbl> 5.50, 4.25, 6.00, 4.25, 5.00, 8.00, 7.75, 3.…
## $ democratic_stability      <dbl> 7.5, 2.0, 8.0, 3.0, 2.0, 9.0, 9.0, 2.0, 9.5,…
## $ political_integration     <dbl> 7.250000, 3.666667, 7.000000, 5.000000, 5.33…
## $ civil_society             <dbl> 7, 7, 4, 4, 5, 4, 4, 10, 4, 5, 4, 2, 5, 2, 7…
## $ conflict_intensity        <dbl> 2, 7, 4, 3, 5, 1, 2, 5, 3, 9, 2, 1, 3, 3, 6,…
## $ steering_capability       <dbl> 5.666667, 4.333333, 6.000000, 5.666667, 5.00…
## $ resource_efficiency       <dbl> 4.333333, 2.666667, 5.333333, 5.000000, 6.00…
## $ corruption_policy         <dbl> 4, 2, 5, 5, 6, 8, 6, 2, 9, 5, 7, 7, 5, 8, 7,…
## $ consensus_building        <dbl> 6.40, 4.20, 7.60, 5.80, 5.00, 8.50, 8.75, 3.…
## $ conflict_management       <dbl> 5, 6, 7, 6, 7, 8, 8, 5, 9, 5, 8, 9, 7, 8, 7,…
## $ international_cooperation <dbl> 7.666667, 4.666667, 6.666667, 7.000000, 6.66…
## $ credibility               <dbl> 6, 5, 5, 8, 7, 9, 8, 6, 10, 8, 9, 10, 10, 10…
## $ primary_enrolment         <dbl> 105.47778, 112.07709, 116.18785, 103.89670, …
## $ secondary_enrolment       <dbl> 84.32577, 83.48119, 92.40758, 95.26765, 83.5…
## $ tertiary_enrolment        <dbl> 26.21010, 21.34580, 66.94447, 41.59036, 17.5…
## $ Tscore1519_ML             <dbl> 414.6, 398.2, 415.0, 476.7, 424.3, 363.7, 47…
merged_clean %>%
  group_by(year) %>%
  summarise(
    mean_quality = mean(Tscore1519_ML)
  ) %>%
  ggplot(aes(year, mean_quality)) +
  geom_line(linewidth = 1) +
  theme_minimal() +
  labs(
    title = "Average Education Quality Over Time",
    y = "Tscore1519_ML"
  )

merged_clean %>%
  group_by(year) %>%
  summarise(
    mean_quality = mean(Tscore1519_ML)
  ) 
## # A tibble: 9 × 2
##    year mean_quality
##   <dbl>        <dbl>
## 1  2006         438.
## 2  2008         436.
## 3  2010         435.
## 4  2012         438.
## 5  2014         437.
## 6  2016         441.
## 7  2018         442.
## 8  2020         437.
## 9  2022         435.
 merged_clean %>% filter(
   year == 2010
 ) %>%
   summary(Tscore1519_ML)
##  country_name           region           year       status_index  
##  Length:43          Min.   :1.000   Min.   :2010   Min.   :3.254  
##  Class :character   1st Qu.:1.000   1st Qu.:2010   1st Qu.:5.950  
##  Mode  :character   Median :2.000   Median :2010   Median :7.139  
##                     Mean   :3.023   Mean   :2010   Mean   :6.970  
##                     3rd Qu.:5.000   3rd Qu.:2010   3rd Qu.:8.268  
##                     Max.   :7.000   Max.   :2010   Max.   :9.650  
##  democracy_status economy_status  governance_index governance_perf
##  Min.   :2.900    Min.   :3.607   Min.   :2.098    Min.   :2.300  
##  1st Qu.:5.317    1st Qu.:6.143   1st Qu.:4.827    1st Qu.:5.542  
##  Median :7.550    Median :6.786   Median :5.891    Median :6.717  
##  Mean   :7.007    Mean   :6.933   Mean   :5.680    Mean   :6.622  
##  3rd Qu.:8.500    3rd Qu.:7.964   3rd Qu.:6.534    3rd Qu.:7.942  
##  Max.   :9.900    Max.   :9.500   Max.   :7.558    Max.   :9.233  
##    stateness      political_participation  rule_of_law     democratic_stability
##  Min.   : 6.750   Min.   : 2.000          Min.   : 2.250   Min.   : 1.000      
##  1st Qu.: 8.000   1st Qu.: 4.875          1st Qu.: 5.250   1st Qu.: 3.000      
##  Median : 8.750   Median : 8.250          Median : 6.250   Median : 7.500      
##  Mean   : 8.529   Mean   : 7.267          Mean   : 6.483   Mean   : 6.523      
##  3rd Qu.: 9.250   3rd Qu.: 9.375          3rd Qu.: 8.250   3rd Qu.: 8.500      
##  Max.   :10.000   Max.   :10.000          Max.   :10.000   Max.   :10.000      
##  political_integration civil_society   conflict_intensity steering_capability
##  Min.   :2.000         Min.   :1.000   Min.   :1.000      Min.   :2.000      
##  1st Qu.:5.000         1st Qu.:4.000   1st Qu.:2.000      1st Qu.:5.167      
##  Median :6.750         Median :5.000   Median :3.000      Median :6.333      
##  Mean   :6.231         Mean   :4.791   Mean   :3.512      Mean   :6.256      
##  3rd Qu.:7.500         3rd Qu.:6.000   3rd Qu.:4.500      3rd Qu.:7.500      
##  Max.   :9.500         Max.   :9.000   Max.   :8.000      Max.   :9.333      
##  resource_efficiency corruption_policy consensus_building conflict_management
##  Min.   :2.333       Min.   :2.000     Min.   :2.200      Min.   :2.000      
##  1st Qu.:4.667       1st Qu.:4.000     1st Qu.:4.900      1st Qu.:5.500      
##  Median :5.667       Median :5.000     Median :7.200      Median :7.000      
##  Mean   :5.791       Mean   :5.256     Mean   :6.613      Mean   :6.279      
##  3rd Qu.:6.667       3rd Qu.:6.500     3rd Qu.:8.000      3rd Qu.:7.000      
##  Max.   :8.667       Max.   :9.000     Max.   :9.600      Max.   :9.000      
##  international_cooperation  credibility     primary_enrolment
##  Min.   : 2.667            Min.   : 2.000   Min.   : 88.71   
##  1st Qu.: 7.000            1st Qu.: 6.500   1st Qu.: 98.64   
##  Median : 8.333            Median : 8.000   Median :100.99   
##  Mean   : 7.829            Mean   : 7.721   Mean   :103.46   
##  3rd Qu.: 8.667            3rd Qu.: 9.000   3rd Qu.:109.12   
##  Max.   :10.000            Max.   :10.000   Max.   :124.38   
##  secondary_enrolment tertiary_enrolment Tscore1519_ML  
##  Min.   : 52.19      Min.   : 8.857     Min.   :271.0  
##  1st Qu.: 85.49      1st Qu.:31.907     1st Qu.:401.4  
##  Median : 92.61      Median :46.723     Median :444.9  
##  Mean   : 90.88      Mean   :47.067     Mean   :435.1  
##  3rd Qu.: 99.03      3rd Qu.:62.841     3rd Qu.:476.2  
##  Max.   :111.03      Max.   :90.251     Max.   :540.8
country_avg <- merged_clean %>%
  group_by(country_name) %>%
  summarise(
    avg_status_index     = mean(status_index, na.rm = TRUE),
    avg_governance_index = mean(governance_index, na.rm = TRUE),
    avg_quality          = mean(Tscore1519_ML, na.rm = TRUE),
    n_years              = n(),
    .groups = "drop"
  )

ggplot(
  country_avg,
  aes(
    x = avg_governance_index,
    y = avg_status_index,
    color = avg_quality
  )
) +
  geom_point(size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  geom_text_repel(
    aes(label = country_name),
    size = 3,
    max.overlaps = 10
  ) +
  scale_color_viridis_c() +
  theme_minimal() +
  labs(
    title = "Average Governance vs Status Index (Colored by Education Quality)",
    x = "Average BTI Governance Index",
    y = "Average BTI Status Index",
    color = "Avg Education Quality\n(Tscore1519_ML)"
  )
## `geom_smooth()` using formula = 'y ~ x'

education <- lm(
  Tscore1519_ML ~
    status_index +
    governance_index +
    year +
    primary_enrolment,
  data = merged_clean
)

summary(education)
## 
## Call:
## lm(formula = Tscore1519_ML ~ status_index + governance_index + 
##     year + primary_enrolment, data = merged_clean)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -165.216  -27.423    8.904   29.655  193.122 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        321.7823  1065.6487   0.302    0.763    
## status_index        35.3012     3.7036   9.532  < 2e-16 ***
## governance_index   -24.4466     4.6552  -5.252 2.48e-07 ***
## year                 0.1006     0.5219   0.193    0.847    
## primary_enrolment   -1.8752     0.4108  -4.565 6.70e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 50.83 on 393 degrees of freedom
## Multiple R-squared:  0.3278, Adjusted R-squared:  0.321 
## F-statistic: 47.92 on 4 and 393 DF,  p-value: < 2.2e-16
country_avg
## # A tibble: 58 × 5
##    country_name        avg_status_index avg_governance_index avg_quality n_years
##    <chr>                          <dbl>                <dbl>       <dbl>   <int>
##  1 Albania                         6.83                 5.71        422.       9
##  2 Algeria                         4.77                 4.09        399.       3
##  3 Argentina                       7.05                 5.52        412.       9
##  4 Armenia                         6.05                 4.86        473.       7
##  5 Azerbaijan                      4.54                 3.95        442.       7
##  6 Bahrain                         5.25                 4.01        447.       6
##  7 Belarus                         4.52                 3.10        483.       8
##  8 Bosnia and Herzego…             6.30                 4.11        438.       8
##  9 Botswana                        7.96                 7.36        363.       2
## 10 Brazil                          7.49                 6.21        414.       6
## # ℹ 48 more rows
cluster_vars <- country_avg %>%
  select(
    avg_governance_index,
    avg_status_index,
    avg_quality
  )
cluster_scaled <- scale(cluster_vars)

set.seed(123)

wss <- sapply(1:10, function(k) {
  kmeans(cluster_scaled, centers = k, nstart = 25)$tot.withinss
})

plot(1:10, wss, type = "b",
     xlab = "Number of clusters (k)",
     ylab = "Within-cluster sum of squares")

k3 <- kmeans(cluster_scaled, centers = 3, nstart = 50)

country_avg$cluster <- factor(k3$cluster)

aggregate(
  country_avg %>% 
    select(avg_governance_index, avg_status_index, avg_quality),
  by = list(cluster = country_avg$cluster),
  mean
)
##   cluster avg_governance_index avg_status_index avg_quality
## 1       1             6.700441         8.582834    497.1411
## 2       2             4.071320         4.995917    416.6836
## 3       3             5.833652         6.741902    402.4422
ggplot(country_avg,
       aes(avg_governance_index, avg_quality, color = cluster)) +
  geom_point(size = 3, alpha = 0.8) +
  theme_minimal() +
  labs(
    title = "Country Clusters by Governance and Education Quality",
    x = "Average Governance Index",
    y = "Average Education Quality"
  )

nrow(merged_clean)
## [1] 398
length(unique(merged_clean$country_name))
## [1] 58
summary(merged_clean)
##  country_name           region           year       status_index  
##  Length:398         Min.   :1.000   Min.   :2006   Min.   :3.014  
##  Class :character   1st Qu.:1.000   1st Qu.:2010   1st Qu.:5.957  
##  Mode  :character   Median :2.000   Median :2014   Median :6.782  
##                     Mean   :3.251   Mean   :2014   Mean   :6.848  
##                     3rd Qu.:6.000   3rd Qu.:2018   3rd Qu.:8.057  
##                     Max.   :7.000   Max.   :2022   Max.   :9.650  
##  democracy_status economy_status  governance_index governance_perf
##  Min.   :2.450    Min.   :3.179   Min.   :1.778    Min.   :1.950  
##  1st Qu.:5.200    1st Qu.:6.009   1st Qu.:4.780    1st Qu.:5.421  
##  Median :7.100    Median :6.714   Median :5.774    Median :6.638  
##  Mean   :6.775    Mean   :6.922   Mean   :5.602    Mean   :6.526  
##  3rd Qu.:8.350    3rd Qu.:7.929   3rd Qu.:6.563    3rd Qu.:7.758  
##  Max.   :9.950    Max.   :9.786   Max.   :7.694    Max.   :9.400  
##    stateness      political_participation  rule_of_law     democratic_stability
##  Min.   : 5.250   Min.   : 1.250          Min.   : 1.750   Min.   : 1.000      
##  1st Qu.: 7.500   1st Qu.: 4.500          1st Qu.: 4.750   1st Qu.: 3.000      
##  Median : 8.500   Median : 7.750          Median : 6.250   Median : 7.000      
##  Mean   : 8.336   Mean   : 6.876          Mean   : 6.263   Mean   : 6.317      
##  3rd Qu.: 9.250   3rd Qu.: 9.000          3rd Qu.: 7.750   3rd Qu.: 8.500      
##  Max.   :10.000   Max.   :10.000          Max.   :10.000   Max.   :10.000      
##  political_integration civil_society    conflict_intensity steering_capability
##  Min.   :2.000         Min.   : 1.000   Min.   :1.000      Min.   :1.667      
##  1st Qu.:5.000         1st Qu.: 4.000   1st Qu.:3.000      1st Qu.:5.000      
##  Median :6.500         Median : 5.000   Median :3.000      Median :6.333      
##  Mean   :6.081         Mean   : 4.912   Mean   :3.693      Mean   :6.241      
##  3rd Qu.:7.250         3rd Qu.: 6.000   3rd Qu.:5.000      3rd Qu.:7.333      
##  Max.   :9.750         Max.   :10.000   Max.   :9.000      Max.   :9.667      
##  resource_efficiency corruption_policy consensus_building conflict_management
##  Min.   :2.000       Min.   :1.000     Min.   :1.800      Min.   : 1.000     
##  1st Qu.:4.667       1st Qu.:4.000     1st Qu.:4.800      1st Qu.: 5.000     
##  Median :5.667       Median :5.000     Median :6.600      Median : 7.000     
##  Mean   :5.837       Mean   :5.379     Mean   :6.386      Mean   : 6.219     
##  3rd Qu.:7.000       3rd Qu.:7.000     3rd Qu.:8.200      3rd Qu.: 8.000     
##  Max.   :9.333       Max.   :9.000     Max.   :9.800      Max.   :10.000     
##  international_cooperation  credibility     primary_enrolment
##  Min.   : 2.333            Min.   : 2.000   Min.   : 82.21   
##  1st Qu.: 6.667            1st Qu.: 6.000   1st Qu.: 98.23   
##  Median : 7.667            Median : 8.000   Median :101.01   
##  Mean   : 7.639            Mean   : 7.457   Mean   :102.12   
##  3rd Qu.: 9.000            3rd Qu.: 9.000   3rd Qu.:105.66   
##  Max.   :10.000            Max.   :10.000   Max.   :128.40   
##  secondary_enrolment tertiary_enrolment Tscore1519_ML  
##  Min.   : 39.14      Min.   :  4.773    Min.   :271.0  
##  1st Qu.: 85.86      1st Qu.: 34.894    1st Qu.:406.1  
##  Median : 96.52      Median : 50.459    Median :440.9  
##  Mean   : 93.25      Mean   : 50.681    Mean   :437.7  
##  3rd Qu.:103.01      3rd Qu.: 67.731    3rd Qu.:476.2  
##  Max.   :125.96      Max.   :107.104    Max.   :610.7
top_countries <- merged_clean %>%
  count(country_name, sort = TRUE) %>%
  slice_head(n = 10) %>%
  pull(country_name)

merged_clean %>%
  filter(country_name %in% top_countries) %>%
  ggplot(aes(year, Tscore1519_ML, group = country_name, color = country_name)) +
  geom_line() +
  theme_minimal() 

ggplot(merged_clean, aes(governance_index, Tscore1519_ML)) +
  geom_point(alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

merged_clean %>%
  select(where(is.numeric)) %>%        # keep only numeric variables
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "value"
  ) %>%
  ggplot(aes(x = value)) +
  geom_histogram(bins = 20) +
  facet_wrap(~ variable, scales = "free") 

library(dplyr)
library(corrplot)
## corrplot 0.95 loaded
corr_mat <- BTI_clean_panel %>%
  select(
    status_index,
    democracy_status,
    governance_index,
    governance_perf,
    stateness,
    political_participation,
    rule_of_law,
    democratic_stability,
    political_integration,
    steering_capability,
    resource_efficiency,
    consensus_building,
    international_cooperation
  ) %>%
  cor(use = "pairwise.complete.obs")

corrplot(corr_mat)

colnames(merged)
##  [1] "country_name"              "region"                   
##  [3] "year"                      "status_index"             
##  [5] "democracy_status"          "economy_status"           
##  [7] "governance_index"          "governance_perf"          
##  [9] "stateness"                 "political_participation"  
## [11] "rule_of_law"               "democratic_stability"     
## [13] "political_integration"     "civil_society"            
## [15] "conflict_intensity"        "steering_capability"      
## [17] "resource_efficiency"       "corruption_policy"        
## [19] "consensus_building"        "conflict_management"      
## [21] "international_cooperation" "credibility"              
## [23] "primary_enrolment"         "secondary_enrolment"      
## [25] "tertiary_enrolment"        "Tscore1519_ML"
library(ggplot2)

ggplot(merged_clean, aes(x = Tscore1519_ML, y = democratic_stability)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(merged_clean, aes(x = primary_enrolment, y = democratic_stability)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(merged_clean, aes(x=secondary_enrolment, y = democratic_stability)) + 
  geom_point(alpha = 0.5) + 
  geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'

Tscore1519_ML (education quality) shows a modest positive relationship with democratic stability. Countries with higher learning outcomes tend to have more stable democratic institutions, although the relationship is noisy and far from perfect.

primary_enrolment shows little to no meaningful relationship with democratic stability. The fitted line is nearly flat, suggesting that simply getting children into primary school does not strongly distinguish politically stable countries from unstable ones.

secondary_enrolment shows a clearer positive relationship with democratic stability than primary enrollment. This suggests that broader access to education at more advanced levels may matter more for institutional stability than basic schooling alone

summary(lm(democratic_stability ~ Tscore1519_ML, data = merged_clean))
## 
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML, data = merged_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.8894 -3.1938  0.8875  2.3354  3.9614 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.335793   1.030815   2.266 0.023993 *  
## Tscore1519_ML 0.009094   0.002332   3.900 0.000113 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.866 on 396 degrees of freedom
## Multiple R-squared:  0.03699,    Adjusted R-squared:  0.03455 
## F-statistic: 15.21 on 1 and 396 DF,  p-value: 0.000113
summary(lm(democratic_stability ~ primary_enrolment, data = merged_clean))
## 
## Call:
## lm(formula = democratic_stability ~ primary_enrolment, data = merged_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.4602 -3.3828  0.7667  2.2672  3.8075 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        3.79000    2.23951   1.692   0.0914 .
## primary_enrolment  0.02474    0.02188   1.131   0.2589  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.916 on 396 degrees of freedom
## Multiple R-squared:  0.003218,   Adjusted R-squared:  0.0007004 
## F-statistic: 1.278 on 1 and 396 DF,  p-value: 0.2589
summary(lm(democratic_stability ~ secondary_enrolment, data = merged_clean))
## 
## Call:
## lm(formula = democratic_stability ~ secondary_enrolment, data = merged_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.3656 -2.9333  0.9068  2.4296  3.9117 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.180374   0.928807   3.424 0.000681 ***
## secondary_enrolment 0.033631   0.009839   3.418 0.000696 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.879 on 396 degrees of freedom
## Multiple R-squared:  0.02866,    Adjusted R-squared:  0.02621 
## F-statistic: 11.68 on 1 and 396 DF,  p-value: 0.000696
summary(lm(democratic_stability ~ tertiary_enrolment, data = merged_clean))
## 
## Call:
## lm(formula = democratic_stability ~ tertiary_enrolment, data = merged_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.4278 -2.4384  0.7459  2.2210  4.5559 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        3.979827   0.343364  11.591  < 2e-16 ***
## tertiary_enrolment 0.046107   0.006211   7.424 7.04e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.737 on 396 degrees of freedom
## Multiple R-squared:  0.1222, Adjusted R-squared:  0.1199 
## F-statistic: 55.11 on 1 and 396 DF,  p-value: 7.042e-13
summary(lm(democratic_stability ~ Tscore1519_ML + primary_enrolment, data = merged_clean))
## 
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML + primary_enrolment, 
##     data = merged_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.0818 -3.1768  0.8757  2.2880  4.2772 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -3.710911   2.768658  -1.340   0.1809    
## Tscore1519_ML      0.010686   0.002416   4.424 1.25e-05 ***
## primary_enrolment  0.052384   0.022281   2.351   0.0192 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.85 on 395 degrees of freedom
## Multiple R-squared:  0.05028,    Adjusted R-squared:  0.04547 
## F-statistic: 10.46 on 2 and 395 DF,  p-value: 3.763e-05
summary(lm(democratic_stability ~  Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
## 
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML + primary_enrolment + 
##     factor(region), data = merged_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0071 -1.0954  0.0300  0.9233  4.0327 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.235390   1.746190   0.135  0.89284    
## Tscore1519_ML      0.010693   0.001791   5.972 5.29e-09 ***
## primary_enrolment  0.028824   0.014113   2.042  0.04178 *  
## factor(region)2    0.188710   0.286105   0.660  0.50991    
## factor(region)3    2.214980   0.677807   3.268  0.00118 ** 
## factor(region)4   -5.682009   0.301286 -18.859  < 2e-16 ***
## factor(region)5    1.434986   0.473849   3.028  0.00262 ** 
## factor(region)6   -3.803029   0.258883 -14.690  < 2e-16 ***
## factor(region)7   -3.656660   0.307157 -11.905  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.634 on 389 degrees of freedom
## Multiple R-squared:  0.6926, Adjusted R-squared:  0.6863 
## F-statistic: 109.6 on 8 and 389 DF,  p-value: < 2.2e-16
summary(lm(status_index ~  Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
## 
## Call:
## lm(formula = status_index ~ Tscore1519_ML + primary_enrolment + 
##     factor(region), data = merged_clean)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.01369 -0.51117  0.00717  0.52850  1.89550 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.2700522  0.8153979   1.558    0.120    
## Tscore1519_ML      0.0133611  0.0008362  15.979  < 2e-16 ***
## primary_enrolment  0.0039680  0.0065900   0.602    0.547    
## factor(region)2    0.1828563  0.1335990   1.369    0.172    
## factor(region)3    1.2793452  0.3165076   4.042 6.39e-05 ***
## factor(region)4   -2.1396528  0.1406878 -15.209  < 2e-16 ***
## factor(region)5    1.1311970  0.2212679   5.112 5.00e-07 ***
## factor(region)6   -2.3378808  0.1208876 -19.339  < 2e-16 ***
## factor(region)7   -1.4549776  0.1434295 -10.144  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.763 on 389 degrees of freedom
## Multiple R-squared:  0.7598, Adjusted R-squared:  0.7549 
## F-statistic: 153.8 on 8 and 389 DF,  p-value: < 2.2e-16
summary(lm(economy_status ~  Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
## 
## Call:
## lm(formula = economy_status ~ Tscore1519_ML + primary_enrolment + 
##     factor(region), data = merged_clean)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.62058 -0.49463  0.03983  0.40384  2.65743 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.8137622  0.7035872   1.157  0.24815    
## Tscore1519_ML      0.0156699  0.0007215  21.719  < 2e-16 ***
## primary_enrolment -0.0044118  0.0056863  -0.776  0.43830    
## factor(region)2    0.2531379  0.1152793   2.196  0.02869 *  
## factor(region)3    0.8545999  0.2731068   3.129  0.00189 ** 
## factor(region)4   -0.5177732  0.1213961  -4.265 2.51e-05 ***
## factor(region)5    1.1103272  0.1909267   5.815 1.26e-08 ***
## factor(region)6   -1.8653228  0.1043110 -17.882  < 2e-16 ***
## factor(region)7   -0.5570562  0.1237618  -4.501 8.95e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6583 on 389 degrees of freedom
## Multiple R-squared:  0.7493, Adjusted R-squared:  0.7442 
## F-statistic: 145.3 on 8 and 389 DF,  p-value: < 2.2e-16
colnames(merged_clean)
##  [1] "country_name"              "region"                   
##  [3] "year"                      "status_index"             
##  [5] "democracy_status"          "economy_status"           
##  [7] "governance_index"          "governance_perf"          
##  [9] "stateness"                 "political_participation"  
## [11] "rule_of_law"               "democratic_stability"     
## [13] "political_integration"     "civil_society"            
## [15] "conflict_intensity"        "steering_capability"      
## [17] "resource_efficiency"       "corruption_policy"        
## [19] "consensus_building"        "conflict_management"      
## [21] "international_cooperation" "credibility"              
## [23] "primary_enrolment"         "secondary_enrolment"      
## [25] "tertiary_enrolment"        "Tscore1519_ML"
summary(lm(democracy_status ~  Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
## 
## Call:
## lm(formula = democracy_status ~ Tscore1519_ML + primary_enrolment + 
##     factor(region), data = merged_clean)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.65966 -0.65361  0.03519  0.64271  2.48232 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.726342   1.126523   1.532 0.126224    
## Tscore1519_ML      0.011052   0.001155   9.568  < 2e-16 ***
## primary_enrolment  0.012348   0.009104   1.356 0.175808    
## factor(region)2    0.112575   0.184575   0.610 0.542276    
## factor(region)3    1.704091   0.437275   3.897 0.000115 ***
## factor(region)4   -3.761532   0.194369 -19.353  < 2e-16 ***
## factor(region)5    1.152067   0.305695   3.769 0.000190 ***
## factor(region)6   -2.810439   0.167014 -16.828  < 2e-16 ***
## factor(region)7   -2.352899   0.198157 -11.874  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.054 on 389 degrees of freedom
## Multiple R-squared:  0.7322, Adjusted R-squared:  0.7266 
## F-statistic: 132.9 on 8 and 389 DF,  p-value: < 2.2e-16
summary(lm(governance_index ~  Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
## 
## Call:
## lm(formula = governance_index ~ Tscore1519_ML + primary_enrolment + 
##     factor(region), data = merged_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7747 -0.4593  0.1143  0.5311  2.1312 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -0.055674   0.880603  -0.063 0.949622    
## Tscore1519_ML      0.009894   0.000903  10.957  < 2e-16 ***
## primary_enrolment  0.014897   0.007117   2.093 0.036977 *  
## factor(region)2    0.546552   0.144283   3.788 0.000176 ***
## factor(region)3    2.055019   0.341818   6.012 4.22e-09 ***
## factor(region)4   -0.988283   0.151938  -6.505 2.40e-10 ***
## factor(region)5    1.577307   0.238962   6.601 1.34e-10 ***
## factor(region)6   -1.463735   0.130555 -11.212  < 2e-16 ***
## factor(region)7   -0.667662   0.154899  -4.310 2.07e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.824 on 389 degrees of freedom
## Multiple R-squared:  0.553,  Adjusted R-squared:  0.5438 
## F-statistic: 60.15 on 8 and 389 DF,  p-value: < 2.2e-16
summary(lm(democratic_stability ~ Tscore1519_ML + secondary_enrolment + factor(region), data = merged_clean))
## 
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML + secondary_enrolment + 
##     factor(region), data = merged_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8902 -1.0917  0.0054  0.9926  3.9372 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.971499   0.899089   3.305  0.00104 ** 
## Tscore1519_ML        0.008362   0.002084   4.012 7.22e-05 ***
## secondary_enrolment  0.012732   0.007362   1.729  0.08455 .  
## factor(region)2      0.341917   0.270992   1.262  0.20781    
## factor(region)3      2.252358   0.681742   3.304  0.00104 ** 
## factor(region)4     -5.661398   0.301149 -18.799  < 2e-16 ***
## factor(region)5      1.303553   0.482078   2.704  0.00715 ** 
## factor(region)6     -3.854141   0.260095 -14.818  < 2e-16 ***
## factor(region)7     -3.501848   0.310148 -11.291  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.636 on 389 degrees of freedom
## Multiple R-squared:  0.6917, Adjusted R-squared:  0.6853 
## F-statistic: 109.1 on 8 and 389 DF,  p-value: < 2.2e-16
unique(merged_clean$country_name)
##  [1] "Albania"                "Algeria"                "Argentina"             
##  [4] "Armenia"                "Bahrain"                "Botswana"              
##  [7] "Bulgaria"               "Cambodia"               "Chile"                 
## [10] "Colombia"               "Croatia"                "Czechia"               
## [13] "El Salvador"            "Estonia"                "Georgia"               
## [16] "Ghana"                  "Hungary"                "Indonesia"             
## [19] "Jordan"                 "Kazakhstan"             "Latvia"                
## [22] "Lithuania"              "Malaysia"               "Mauritius"             
## [25] "Mexico"                 "Mongolia"               "Morocco"               
## [28] "Panama"                 "Peru"                   "Philippines"           
## [31] "Poland"                 "Romania"                "Serbia"                
## [34] "Slovenia"               "Thailand"               "Tunisia"               
## [37] "Ukraine"                "Uruguay"                "Uzbekistan"            
## [40] "Belarus"                "Bosnia and Herzegovina" "Jamaica"               
## [43] "Montenegro"             "North Macedonia"        "Oman"                  
## [46] "Paraguay"               "Azerbaijan"             "Dominican Republic"    
## [49] "Guatemala"              "Qatar"                  "South Africa"          
## [52] "Brazil"                 "Costa Rica"             "Kuwait"                
## [55] "Moldova"                "Saudi Arabia"           "Singapore"             
## [58] "United Arab Emirates"
summary(lm(democratic_stability ~ Tscore1519_ML + tertiary_enrolment + factor(region), data = merged_clean))
## 
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML + tertiary_enrolment + 
##     factor(region), data = merged_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1896 -1.0340  0.0491  1.0067  4.3256 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         4.307482   0.909403   4.737 3.05e-06 ***
## Tscore1519_ML       0.005674   0.002183   2.599 0.009693 ** 
## tertiary_enrolment  0.019204   0.005441   3.530 0.000466 ***
## factor(region)2     0.269926   0.268623   1.005 0.315592    
## factor(region)3     2.262495   0.670302   3.375 0.000811 ***
## factor(region)4    -5.410262   0.302296 -17.897  < 2e-16 ***
## factor(region)5     1.617775   0.471250   3.433 0.000661 ***
## factor(region)6    -3.772812   0.256413 -14.714  < 2e-16 ***
## factor(region)7    -3.371352   0.308569 -10.926  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.617 on 389 degrees of freedom
## Multiple R-squared:  0.6989, Adjusted R-squared:  0.6927 
## F-statistic: 112.9 on 8 and 389 DF,  p-value: < 2.2e-16
ggplot(merged_clean, aes(x = Tscore1519_ML, y = democratic_stability)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  facet_wrap(~ year) +
  theme_minimal() +
  labs(
    title = "Democratic Stability vs Test Scores by Year",
    x = "Test Score",
    y = "Democratic Stability"
  )
## `geom_smooth()` using formula = 'y ~ x'

ggplot(merged_clean, aes(x = Tscore1519_ML, y = democratic_stability)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  facet_wrap(~ region) +
  theme_minimal() +
  labs(
    title = "Democratic Stability vs Test Scores by Region",
    x = "Test Score",
    y = "Democratic Stability"
  )
## `geom_smooth()` using formula = 'y ~ x'