library(readxl)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggrepel)
## Loading required package: ggplot2
Importing Political Stability
years <- seq(2006, 2024, 2)
BTI_list <- lapply(years, function(y) {
read_excel(
"/Users/lilydorathy/Library/CloudStorage/OneDrive-UniversityofVirginia/2025-26/2026 Spring/STAT 4996 Capstone/Project/BTI_2006-2024_Scores.xlsx",
sheet = paste("BTI", y)
)
})
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `S | Status Index` -> `S | Status Index...4`
## • `SI | Democracy Status` -> `SI | Democracy Status...5`
## • `SII | Economy Status` -> `SII | Economy Status...29`
## • `G | Governance Index` -> `G | Governance Index...52`
## • `Q13 | Level of Difficulty` -> `Q13 | Level of Difficulty...53`
## • `GII | Governance Performance` -> `GII | Governance Performance...60`
## • `` -> `...79`
## • `` -> `...84`
## • `` -> `...88`
## • `` -> `...92`
## • `` -> `...93`
## • `` -> `...104`
## • `S | Status Index` -> `S | Status Index...106`
## • `Category` -> `Category...107`
## • `` -> `...108`
## • `SI | Democracy Status` -> `SI | Democracy Status...109`
## • `Category` -> `Category...110`
## • `` -> `...111`
## • `SII | Economy Status` -> `SII | Economy Status...112`
## • `Category` -> `Category...113`
## • `` -> `...114`
## • `G | Governance Index` -> `G | Governance Index...115`
## • `Category` -> `Category...116`
## • `` -> `...117`
## • `Q13 | Level of Difficulty` -> `Q13 | Level of Difficulty...118`
## • `Category` -> `Category...119`
## • `` -> `...120`
## • `GII | Governance Performance` -> `GII | Governance Performance...121`
## • `Category` -> `Category...122`
## • `` -> `...123`
names(BTI_list) <- paste0("BTI_", years)
BTI_clean_list <- Map(function(df, y) {
df %>%
rename(country_name = 1) %>%
select(
country_name,
region = Region,
# broad indices
status_index = `S | Status Index...4`,
democracy_status = `SI | Democracy Status...5`,
economy_status = `SII | Economy Status...29`,
governance_index = `G | Governance Index...52`,
governance_perf = `GII | Governance Performance...60`,
# political components
stateness = `Q1 | Stateness`,
political_participation = `Q2 | Political Participation`,
rule_of_law = `Q3 | Rule of Law`,
democratic_stability = `Q4 | Stability of Democratic Institutions`,
political_integration = `Q5 | Political and Social Integration`,
# governance components
civil_society = `Q13.2 | Civil society traditions`,
conflict_intensity = `Q13.3 | Conflict intensity`,
steering_capability = `Q14 | Steering Capability`,
resource_efficiency = `Q15 | Resource Efficiency`,
corruption_policy = `Q15.3 | Anti-corruption policy`,
consensus_building = `Q16 | Consensus-Building`,
conflict_management = `Q16.3 | Cleavage / conflict management`,
international_cooperation = `Q17 | International Cooperation`,
credibility = `Q17.2 | Credibility`
) %>%
mutate(
across(
-c(country_name, region),
~ readr::parse_number(
na = "-",
as.character(.)
)
),
year = y
) %>%
relocate(year, .after = region)
}, BTI_list, years)
names(BTI_clean_list) <- paste0("BTI_", years)
BTI_clean_panel <- bind_rows(BTI_clean_list)
BTI_clean_panel
## # A tibble: 1,370 × 22
## country_name region year status_index democracy_status economy_status
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan 7 2006 3.02 2.97 3.07
## 2 Albania 1 2006 6.61 7.25 5.96
## 3 Algeria 4 2006 4.42 4.23 4.61
## 4 Angola 5 2006 3.41 3.38 3.43
## 5 Argentina 2 2006 7.21 7.85 6.57
## 6 Armenia 6 2006 5.81 5.2 6.43
## 7 Azerbaijan 6 2006 4.51 3.8 5.21
## 8 Bahrain 4 2006 6.21 4.92 7.5
## 9 Bangladesh 7 2006 6.01 6.55 5.46
## 10 Belarus 6 2006 4.47 3.97 4.96
## # ℹ 1,360 more rows
## # ℹ 16 more variables: governance_index <dbl>, governance_perf <dbl>,
## # stateness <dbl>, political_participation <dbl>, rule_of_law <dbl>,
## # democratic_stability <dbl>, political_integration <dbl>,
## # civil_society <dbl>, conflict_intensity <dbl>, steering_capability <dbl>,
## # resource_efficiency <dbl>, corruption_policy <dbl>,
## # consensus_building <dbl>, conflict_management <dbl>, …
#write_excel_csv(BTI_clean_panel, file = "BTI_clean_panel.csv")
Importing Education Quality
Lee_Lee_2025 <- read_csv("Lee_Lee_2025.csv")
## Rows: 5454 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CountryName, CountryCode
## dbl (8): Year, Observed_data_flag, Tscore_INT, Tscore_ML, Tscore1519_INT, Ts...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
quality_clean <- Lee_Lee_2025 %>%
rename(
country_name = CountryName,
year = Year
) %>%
select(
country_name,
year,
Tscore1519_ML
) %>%
filter(year >= 2006)
Importing Education Quantity
quantity <- read_csv("primary-secondary-enrollment-completion-rates.csv")
## Rows: 10628 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (6): Year, Primary enrolment, Secondary enrolment, Tertiary enrolment, P...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
quantity_clean <- quantity %>%
clean_names() %>%
rename(
country_name = entity
) %>%
filter(year >= 2006) %>%
select(
country_name,
year,
primary_enrolment,
secondary_enrolment,
tertiary_enrolment
) %>%
filter (
year >= 2006
)
unique(quantity_clean$country_name)
## [1] "Afghanistan"
## [2] "Albania"
## [3] "Algeria"
## [4] "Andorra"
## [5] "Angola"
## [6] "Anguilla"
## [7] "Antigua and Barbuda"
## [8] "Argentina"
## [9] "Armenia"
## [10] "Aruba"
## [11] "Australia"
## [12] "Austria"
## [13] "Azerbaijan"
## [14] "Bahamas"
## [15] "Bahrain"
## [16] "Bangladesh"
## [17] "Barbados"
## [18] "Belarus"
## [19] "Belgium"
## [20] "Belize"
## [21] "Benin"
## [22] "Bermuda"
## [23] "Bhutan"
## [24] "Bolivia"
## [25] "Bosnia and Herzegovina"
## [26] "Botswana"
## [27] "Brazil"
## [28] "British Virgin Islands"
## [29] "Brunei"
## [30] "Bulgaria"
## [31] "Burkina Faso"
## [32] "Burundi"
## [33] "Cambodia"
## [34] "Cameroon"
## [35] "Canada"
## [36] "Cape Verde"
## [37] "Cayman Islands"
## [38] "Central African Republic"
## [39] "Central and Southern Asia (SDG)"
## [40] "Chad"
## [41] "Chile"
## [42] "China"
## [43] "Colombia"
## [44] "Comoros"
## [45] "Congo"
## [46] "Cook Islands"
## [47] "Costa Rica"
## [48] "Cote d'Ivoire"
## [49] "Croatia"
## [50] "Cuba"
## [51] "Curacao"
## [52] "Cyprus"
## [53] "Czechia"
## [54] "Democratic Republic of Congo"
## [55] "Denmark"
## [56] "Djibouti"
## [57] "Dominica"
## [58] "Dominican Republic"
## [59] "East Asia and Pacific (WB)"
## [60] "East Timor"
## [61] "Eastern and South-Eastern Asia (SDG)"
## [62] "Ecuador"
## [63] "Egypt"
## [64] "El Salvador"
## [65] "Equatorial Guinea"
## [66] "Eritrea"
## [67] "Estonia"
## [68] "Eswatini"
## [69] "Ethiopia"
## [70] "Europe and Central Asia (WB)"
## [71] "Europe and Northern America (SDG)"
## [72] "European Union (27)"
## [73] "Fiji"
## [74] "Finland"
## [75] "France"
## [76] "Gabon"
## [77] "Gambia"
## [78] "Georgia"
## [79] "Germany"
## [80] "Ghana"
## [81] "Gibraltar"
## [82] "Greece"
## [83] "Grenada"
## [84] "Guatemala"
## [85] "Guinea"
## [86] "Guinea-Bissau"
## [87] "Guyana"
## [88] "Haiti"
## [89] "High-income countries"
## [90] "Honduras"
## [91] "Hong Kong"
## [92] "Hungary"
## [93] "Iceland"
## [94] "India"
## [95] "Indonesia"
## [96] "Iran"
## [97] "Iraq"
## [98] "Ireland"
## [99] "Israel"
## [100] "Italy"
## [101] "Jamaica"
## [102] "Japan"
## [103] "Jordan"
## [104] "Kazakhstan"
## [105] "Kenya"
## [106] "Kiribati"
## [107] "Kuwait"
## [108] "Kyrgyzstan"
## [109] "Laos"
## [110] "Latin America and Caribbean (WB)"
## [111] "Latin America and the Caribbean (SDG)"
## [112] "Latvia"
## [113] "Lebanon"
## [114] "Lesotho"
## [115] "Liberia"
## [116] "Libya"
## [117] "Liechtenstein"
## [118] "Lithuania"
## [119] "Low-income countries"
## [120] "Lower-middle-income countries"
## [121] "Luxembourg"
## [122] "Macao"
## [123] "Madagascar"
## [124] "Malawi"
## [125] "Malaysia"
## [126] "Maldives"
## [127] "Mali"
## [128] "Malta"
## [129] "Marshall Islands"
## [130] "Mauritania"
## [131] "Mauritius"
## [132] "Mexico"
## [133] "Micronesia (country)"
## [134] "Middle East and North Africa (WB)"
## [135] "Middle-income countries"
## [136] "Moldova"
## [137] "Monaco"
## [138] "Mongolia"
## [139] "Montenegro"
## [140] "Montserrat"
## [141] "Morocco"
## [142] "Mozambique"
## [143] "Myanmar"
## [144] "Namibia"
## [145] "Nauru"
## [146] "Nepal"
## [147] "Netherlands"
## [148] "New Caledonia"
## [149] "New Zealand"
## [150] "Nicaragua"
## [151] "Niger"
## [152] "Nigeria"
## [153] "Niue"
## [154] "North America (WB)"
## [155] "North Korea"
## [156] "North Macedonia"
## [157] "Northern Africa and Western Asia (SDG)"
## [158] "Norway"
## [159] "Oceania (excluding Australia and New Zealand) (SDG)"
## [160] "Oman"
## [161] "Pakistan"
## [162] "Palau"
## [163] "Palestine"
## [164] "Panama"
## [165] "Papua New Guinea"
## [166] "Paraguay"
## [167] "Peru"
## [168] "Philippines"
## [169] "Poland"
## [170] "Portugal"
## [171] "Puerto Rico"
## [172] "Qatar"
## [173] "Romania"
## [174] "Russia"
## [175] "Rwanda"
## [176] "Saint Kitts and Nevis"
## [177] "Saint Lucia"
## [178] "Saint Vincent and the Grenadines"
## [179] "Samoa"
## [180] "San Marino"
## [181] "Sao Tome and Principe"
## [182] "Saudi Arabia"
## [183] "Senegal"
## [184] "Serbia"
## [185] "Seychelles"
## [186] "Sierra Leone"
## [187] "Singapore"
## [188] "Sint Maarten (Dutch part)"
## [189] "Slovakia"
## [190] "Slovenia"
## [191] "Solomon Islands"
## [192] "Somalia"
## [193] "South Africa"
## [194] "South Asia (WB)"
## [195] "South Korea"
## [196] "South Sudan"
## [197] "Spain"
## [198] "Sri Lanka"
## [199] "Sub-Saharan Africa (SDG)"
## [200] "Sub-Saharan Africa (WB)"
## [201] "Sudan"
## [202] "Suriname"
## [203] "Sweden"
## [204] "Switzerland"
## [205] "Syria"
## [206] "Tajikistan"
## [207] "Tanzania"
## [208] "Thailand"
## [209] "Togo"
## [210] "Tokelau"
## [211] "Tonga"
## [212] "Trinidad and Tobago"
## [213] "Tunisia"
## [214] "Turkey"
## [215] "Turkmenistan"
## [216] "Turks and Caicos Islands"
## [217] "Tuvalu"
## [218] "Uganda"
## [219] "Ukraine"
## [220] "United Arab Emirates"
## [221] "United Kingdom"
## [222] "United States"
## [223] "Upper-middle-income countries"
## [224] "Uruguay"
## [225] "Uzbekistan"
## [226] "Vanuatu"
## [227] "Venezuela"
## [228] "Vietnam"
## [229] "World"
## [230] "Yemen"
## [231] "Zambia"
## [232] "Zimbabwe"
Merging
merged <- BTI_clean_panel %>%
left_join(quantity_clean, by = c("country_name", "year")) %>%
left_join(quality_clean, by = c("country_name", "year"))
colnames(merged)
## [1] "country_name" "region"
## [3] "year" "status_index"
## [5] "democracy_status" "economy_status"
## [7] "governance_index" "governance_perf"
## [9] "stateness" "political_participation"
## [11] "rule_of_law" "democratic_stability"
## [13] "political_integration" "civil_society"
## [15] "conflict_intensity" "steering_capability"
## [17] "resource_efficiency" "corruption_policy"
## [19] "consensus_building" "conflict_management"
## [21] "international_cooperation" "credibility"
## [23] "primary_enrolment" "secondary_enrolment"
## [25] "tertiary_enrolment" "Tscore1519_ML"
write_excel_csv(merged, file = "merged.csv")
Checks
merged %>%
count(country_name, year) %>%
filter(n > 1)
## # A tibble: 0 × 3
## # ℹ 3 variables: country_name <chr>, year <dbl>, n <int>
range(merged$year)
## [1] 2006 2024
colSums(is.na(merged))
## country_name region year
## 0 0 0
## status_index democracy_status economy_status
## 72 72 72
## governance_index governance_perf stateness
## 73 73 72
## political_participation rule_of_law democratic_stability
## 72 72 72
## political_integration civil_society conflict_intensity
## 72 72 72
## steering_capability resource_efficiency corruption_policy
## 73 73 73
## consensus_building conflict_management international_cooperation
## 73 73 73
## credibility primary_enrolment secondary_enrolment
## 73 390 557
## tertiary_enrolment Tscore1519_ML
## 533 821
merged_clean <- merged %>%
drop_na(status_index, governance_index, Tscore1519_ML, primary_enrolment, secondary_enrolment, tertiary_enrolment)
colSums(is.na(merged_clean))
## country_name region year
## 0 0 0
## status_index democracy_status economy_status
## 0 0 0
## governance_index governance_perf stateness
## 0 0 0
## political_participation rule_of_law democratic_stability
## 0 0 0
## political_integration civil_society conflict_intensity
## 0 0 0
## steering_capability resource_efficiency corruption_policy
## 0 0 0
## consensus_building conflict_management international_cooperation
## 0 0 0
## credibility primary_enrolment secondary_enrolment
## 0 0 0
## tertiary_enrolment Tscore1519_ML
## 0 0
glimpse(merged_clean)
## Rows: 398
## Columns: 26
## $ country_name <chr> "Albania", "Algeria", "Argentina", "Armenia"…
## $ region <dbl> 1, 4, 2, 6, 4, 5, 1, 7, 2, 2, 1, 1, 2, 1, 6,…
## $ year <dbl> 2006, 2006, 2006, 2006, 2006, 2006, 2006, 20…
## $ status_index <dbl> 6.607143, 4.420238, 7.210714, 5.814286, 6.20…
## $ democracy_status <dbl> 7.250000, 4.233333, 7.850000, 5.200000, 4.91…
## $ economy_status <dbl> 5.964286, 4.607143, 6.571429, 6.428571, 7.50…
## $ governance_index <dbl> 5.331435, 3.648966, 5.514074, 5.138765, 4.85…
## $ governance_perf <dbl> 6.016667, 3.966667, 6.400000, 5.866667, 5.66…
## $ stateness <dbl> 8.00, 7.00, 8.75, 8.75, 8.25, 9.00, 9.25, 7.…
## $ political_participation <dbl> 8.00, 4.25, 9.50, 5.00, 4.00, 9.00, 9.25, 4.…
## $ rule_of_law <dbl> 5.50, 4.25, 6.00, 4.25, 5.00, 8.00, 7.75, 3.…
## $ democratic_stability <dbl> 7.5, 2.0, 8.0, 3.0, 2.0, 9.0, 9.0, 2.0, 9.5,…
## $ political_integration <dbl> 7.250000, 3.666667, 7.000000, 5.000000, 5.33…
## $ civil_society <dbl> 7, 7, 4, 4, 5, 4, 4, 10, 4, 5, 4, 2, 5, 2, 7…
## $ conflict_intensity <dbl> 2, 7, 4, 3, 5, 1, 2, 5, 3, 9, 2, 1, 3, 3, 6,…
## $ steering_capability <dbl> 5.666667, 4.333333, 6.000000, 5.666667, 5.00…
## $ resource_efficiency <dbl> 4.333333, 2.666667, 5.333333, 5.000000, 6.00…
## $ corruption_policy <dbl> 4, 2, 5, 5, 6, 8, 6, 2, 9, 5, 7, 7, 5, 8, 7,…
## $ consensus_building <dbl> 6.40, 4.20, 7.60, 5.80, 5.00, 8.50, 8.75, 3.…
## $ conflict_management <dbl> 5, 6, 7, 6, 7, 8, 8, 5, 9, 5, 8, 9, 7, 8, 7,…
## $ international_cooperation <dbl> 7.666667, 4.666667, 6.666667, 7.000000, 6.66…
## $ credibility <dbl> 6, 5, 5, 8, 7, 9, 8, 6, 10, 8, 9, 10, 10, 10…
## $ primary_enrolment <dbl> 105.47778, 112.07709, 116.18785, 103.89670, …
## $ secondary_enrolment <dbl> 84.32577, 83.48119, 92.40758, 95.26765, 83.5…
## $ tertiary_enrolment <dbl> 26.21010, 21.34580, 66.94447, 41.59036, 17.5…
## $ Tscore1519_ML <dbl> 414.6, 398.2, 415.0, 476.7, 424.3, 363.7, 47…
merged_clean %>%
group_by(year) %>%
summarise(
mean_quality = mean(Tscore1519_ML)
) %>%
ggplot(aes(year, mean_quality)) +
geom_line(linewidth = 1) +
theme_minimal() +
labs(
title = "Average Education Quality Over Time",
y = "Tscore1519_ML"
)
merged_clean %>%
group_by(year) %>%
summarise(
mean_quality = mean(Tscore1519_ML)
)
## # A tibble: 9 × 2
## year mean_quality
## <dbl> <dbl>
## 1 2006 438.
## 2 2008 436.
## 3 2010 435.
## 4 2012 438.
## 5 2014 437.
## 6 2016 441.
## 7 2018 442.
## 8 2020 437.
## 9 2022 435.
merged_clean %>% filter(
year == 2010
) %>%
summary(Tscore1519_ML)
## country_name region year status_index
## Length:43 Min. :1.000 Min. :2010 Min. :3.254
## Class :character 1st Qu.:1.000 1st Qu.:2010 1st Qu.:5.950
## Mode :character Median :2.000 Median :2010 Median :7.139
## Mean :3.023 Mean :2010 Mean :6.970
## 3rd Qu.:5.000 3rd Qu.:2010 3rd Qu.:8.268
## Max. :7.000 Max. :2010 Max. :9.650
## democracy_status economy_status governance_index governance_perf
## Min. :2.900 Min. :3.607 Min. :2.098 Min. :2.300
## 1st Qu.:5.317 1st Qu.:6.143 1st Qu.:4.827 1st Qu.:5.542
## Median :7.550 Median :6.786 Median :5.891 Median :6.717
## Mean :7.007 Mean :6.933 Mean :5.680 Mean :6.622
## 3rd Qu.:8.500 3rd Qu.:7.964 3rd Qu.:6.534 3rd Qu.:7.942
## Max. :9.900 Max. :9.500 Max. :7.558 Max. :9.233
## stateness political_participation rule_of_law democratic_stability
## Min. : 6.750 Min. : 2.000 Min. : 2.250 Min. : 1.000
## 1st Qu.: 8.000 1st Qu.: 4.875 1st Qu.: 5.250 1st Qu.: 3.000
## Median : 8.750 Median : 8.250 Median : 6.250 Median : 7.500
## Mean : 8.529 Mean : 7.267 Mean : 6.483 Mean : 6.523
## 3rd Qu.: 9.250 3rd Qu.: 9.375 3rd Qu.: 8.250 3rd Qu.: 8.500
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
## political_integration civil_society conflict_intensity steering_capability
## Min. :2.000 Min. :1.000 Min. :1.000 Min. :2.000
## 1st Qu.:5.000 1st Qu.:4.000 1st Qu.:2.000 1st Qu.:5.167
## Median :6.750 Median :5.000 Median :3.000 Median :6.333
## Mean :6.231 Mean :4.791 Mean :3.512 Mean :6.256
## 3rd Qu.:7.500 3rd Qu.:6.000 3rd Qu.:4.500 3rd Qu.:7.500
## Max. :9.500 Max. :9.000 Max. :8.000 Max. :9.333
## resource_efficiency corruption_policy consensus_building conflict_management
## Min. :2.333 Min. :2.000 Min. :2.200 Min. :2.000
## 1st Qu.:4.667 1st Qu.:4.000 1st Qu.:4.900 1st Qu.:5.500
## Median :5.667 Median :5.000 Median :7.200 Median :7.000
## Mean :5.791 Mean :5.256 Mean :6.613 Mean :6.279
## 3rd Qu.:6.667 3rd Qu.:6.500 3rd Qu.:8.000 3rd Qu.:7.000
## Max. :8.667 Max. :9.000 Max. :9.600 Max. :9.000
## international_cooperation credibility primary_enrolment
## Min. : 2.667 Min. : 2.000 Min. : 88.71
## 1st Qu.: 7.000 1st Qu.: 6.500 1st Qu.: 98.64
## Median : 8.333 Median : 8.000 Median :100.99
## Mean : 7.829 Mean : 7.721 Mean :103.46
## 3rd Qu.: 8.667 3rd Qu.: 9.000 3rd Qu.:109.12
## Max. :10.000 Max. :10.000 Max. :124.38
## secondary_enrolment tertiary_enrolment Tscore1519_ML
## Min. : 52.19 Min. : 8.857 Min. :271.0
## 1st Qu.: 85.49 1st Qu.:31.907 1st Qu.:401.4
## Median : 92.61 Median :46.723 Median :444.9
## Mean : 90.88 Mean :47.067 Mean :435.1
## 3rd Qu.: 99.03 3rd Qu.:62.841 3rd Qu.:476.2
## Max. :111.03 Max. :90.251 Max. :540.8
country_avg <- merged_clean %>%
group_by(country_name) %>%
summarise(
avg_status_index = mean(status_index, na.rm = TRUE),
avg_governance_index = mean(governance_index, na.rm = TRUE),
avg_quality = mean(Tscore1519_ML, na.rm = TRUE),
n_years = n(),
.groups = "drop"
)
ggplot(
country_avg,
aes(
x = avg_governance_index,
y = avg_status_index,
color = avg_quality
)
) +
geom_point(size = 3, alpha = 0.8) +
geom_smooth(method = "lm", se = FALSE, color = "black") +
geom_text_repel(
aes(label = country_name),
size = 3,
max.overlaps = 10
) +
scale_color_viridis_c() +
theme_minimal() +
labs(
title = "Average Governance vs Status Index (Colored by Education Quality)",
x = "Average BTI Governance Index",
y = "Average BTI Status Index",
color = "Avg Education Quality\n(Tscore1519_ML)"
)
## `geom_smooth()` using formula = 'y ~ x'
education <- lm(
Tscore1519_ML ~
status_index +
governance_index +
year +
primary_enrolment,
data = merged_clean
)
summary(education)
##
## Call:
## lm(formula = Tscore1519_ML ~ status_index + governance_index +
## year + primary_enrolment, data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -165.216 -27.423 8.904 29.655 193.122
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 321.7823 1065.6487 0.302 0.763
## status_index 35.3012 3.7036 9.532 < 2e-16 ***
## governance_index -24.4466 4.6552 -5.252 2.48e-07 ***
## year 0.1006 0.5219 0.193 0.847
## primary_enrolment -1.8752 0.4108 -4.565 6.70e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.83 on 393 degrees of freedom
## Multiple R-squared: 0.3278, Adjusted R-squared: 0.321
## F-statistic: 47.92 on 4 and 393 DF, p-value: < 2.2e-16
country_avg
## # A tibble: 58 × 5
## country_name avg_status_index avg_governance_index avg_quality n_years
## <chr> <dbl> <dbl> <dbl> <int>
## 1 Albania 6.83 5.71 422. 9
## 2 Algeria 4.77 4.09 399. 3
## 3 Argentina 7.05 5.52 412. 9
## 4 Armenia 6.05 4.86 473. 7
## 5 Azerbaijan 4.54 3.95 442. 7
## 6 Bahrain 5.25 4.01 447. 6
## 7 Belarus 4.52 3.10 483. 8
## 8 Bosnia and Herzego… 6.30 4.11 438. 8
## 9 Botswana 7.96 7.36 363. 2
## 10 Brazil 7.49 6.21 414. 6
## # ℹ 48 more rows
cluster_vars <- country_avg %>%
select(
avg_governance_index,
avg_status_index,
avg_quality
)
cluster_scaled <- scale(cluster_vars)
set.seed(123)
wss <- sapply(1:10, function(k) {
kmeans(cluster_scaled, centers = k, nstart = 25)$tot.withinss
})
plot(1:10, wss, type = "b",
xlab = "Number of clusters (k)",
ylab = "Within-cluster sum of squares")
k3 <- kmeans(cluster_scaled, centers = 3, nstart = 50)
country_avg$cluster <- factor(k3$cluster)
aggregate(
country_avg %>%
select(avg_governance_index, avg_status_index, avg_quality),
by = list(cluster = country_avg$cluster),
mean
)
## cluster avg_governance_index avg_status_index avg_quality
## 1 1 6.700441 8.582834 497.1411
## 2 2 4.071320 4.995917 416.6836
## 3 3 5.833652 6.741902 402.4422
ggplot(country_avg,
aes(avg_governance_index, avg_quality, color = cluster)) +
geom_point(size = 3, alpha = 0.8) +
theme_minimal() +
labs(
title = "Country Clusters by Governance and Education Quality",
x = "Average Governance Index",
y = "Average Education Quality"
)
nrow(merged_clean)
## [1] 398
length(unique(merged_clean$country_name))
## [1] 58
summary(merged_clean)
## country_name region year status_index
## Length:398 Min. :1.000 Min. :2006 Min. :3.014
## Class :character 1st Qu.:1.000 1st Qu.:2010 1st Qu.:5.957
## Mode :character Median :2.000 Median :2014 Median :6.782
## Mean :3.251 Mean :2014 Mean :6.848
## 3rd Qu.:6.000 3rd Qu.:2018 3rd Qu.:8.057
## Max. :7.000 Max. :2022 Max. :9.650
## democracy_status economy_status governance_index governance_perf
## Min. :2.450 Min. :3.179 Min. :1.778 Min. :1.950
## 1st Qu.:5.200 1st Qu.:6.009 1st Qu.:4.780 1st Qu.:5.421
## Median :7.100 Median :6.714 Median :5.774 Median :6.638
## Mean :6.775 Mean :6.922 Mean :5.602 Mean :6.526
## 3rd Qu.:8.350 3rd Qu.:7.929 3rd Qu.:6.563 3rd Qu.:7.758
## Max. :9.950 Max. :9.786 Max. :7.694 Max. :9.400
## stateness political_participation rule_of_law democratic_stability
## Min. : 5.250 Min. : 1.250 Min. : 1.750 Min. : 1.000
## 1st Qu.: 7.500 1st Qu.: 4.500 1st Qu.: 4.750 1st Qu.: 3.000
## Median : 8.500 Median : 7.750 Median : 6.250 Median : 7.000
## Mean : 8.336 Mean : 6.876 Mean : 6.263 Mean : 6.317
## 3rd Qu.: 9.250 3rd Qu.: 9.000 3rd Qu.: 7.750 3rd Qu.: 8.500
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
## political_integration civil_society conflict_intensity steering_capability
## Min. :2.000 Min. : 1.000 Min. :1.000 Min. :1.667
## 1st Qu.:5.000 1st Qu.: 4.000 1st Qu.:3.000 1st Qu.:5.000
## Median :6.500 Median : 5.000 Median :3.000 Median :6.333
## Mean :6.081 Mean : 4.912 Mean :3.693 Mean :6.241
## 3rd Qu.:7.250 3rd Qu.: 6.000 3rd Qu.:5.000 3rd Qu.:7.333
## Max. :9.750 Max. :10.000 Max. :9.000 Max. :9.667
## resource_efficiency corruption_policy consensus_building conflict_management
## Min. :2.000 Min. :1.000 Min. :1.800 Min. : 1.000
## 1st Qu.:4.667 1st Qu.:4.000 1st Qu.:4.800 1st Qu.: 5.000
## Median :5.667 Median :5.000 Median :6.600 Median : 7.000
## Mean :5.837 Mean :5.379 Mean :6.386 Mean : 6.219
## 3rd Qu.:7.000 3rd Qu.:7.000 3rd Qu.:8.200 3rd Qu.: 8.000
## Max. :9.333 Max. :9.000 Max. :9.800 Max. :10.000
## international_cooperation credibility primary_enrolment
## Min. : 2.333 Min. : 2.000 Min. : 82.21
## 1st Qu.: 6.667 1st Qu.: 6.000 1st Qu.: 98.23
## Median : 7.667 Median : 8.000 Median :101.01
## Mean : 7.639 Mean : 7.457 Mean :102.12
## 3rd Qu.: 9.000 3rd Qu.: 9.000 3rd Qu.:105.66
## Max. :10.000 Max. :10.000 Max. :128.40
## secondary_enrolment tertiary_enrolment Tscore1519_ML
## Min. : 39.14 Min. : 4.773 Min. :271.0
## 1st Qu.: 85.86 1st Qu.: 34.894 1st Qu.:406.1
## Median : 96.52 Median : 50.459 Median :440.9
## Mean : 93.25 Mean : 50.681 Mean :437.7
## 3rd Qu.:103.01 3rd Qu.: 67.731 3rd Qu.:476.2
## Max. :125.96 Max. :107.104 Max. :610.7
top_countries <- merged_clean %>%
count(country_name, sort = TRUE) %>%
slice_head(n = 10) %>%
pull(country_name)
merged_clean %>%
filter(country_name %in% top_countries) %>%
ggplot(aes(year, Tscore1519_ML, group = country_name, color = country_name)) +
geom_line() +
theme_minimal()
ggplot(merged_clean, aes(governance_index, Tscore1519_ML)) +
geom_point(alpha = 0.4) +
geom_smooth(method = "lm", se = FALSE) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
merged_clean %>%
select(where(is.numeric)) %>% # keep only numeric variables
pivot_longer(
cols = everything(),
names_to = "variable",
values_to = "value"
) %>%
ggplot(aes(x = value)) +
geom_histogram(bins = 20) +
facet_wrap(~ variable, scales = "free")
library(dplyr)
library(corrplot)
## corrplot 0.95 loaded
corr_mat <- BTI_clean_panel %>%
select(
status_index,
democracy_status,
governance_index,
governance_perf,
stateness,
political_participation,
rule_of_law,
democratic_stability,
political_integration,
steering_capability,
resource_efficiency,
consensus_building,
international_cooperation
) %>%
cor(use = "pairwise.complete.obs")
corrplot(corr_mat)
colnames(merged)
## [1] "country_name" "region"
## [3] "year" "status_index"
## [5] "democracy_status" "economy_status"
## [7] "governance_index" "governance_perf"
## [9] "stateness" "political_participation"
## [11] "rule_of_law" "democratic_stability"
## [13] "political_integration" "civil_society"
## [15] "conflict_intensity" "steering_capability"
## [17] "resource_efficiency" "corruption_policy"
## [19] "consensus_building" "conflict_management"
## [21] "international_cooperation" "credibility"
## [23] "primary_enrolment" "secondary_enrolment"
## [25] "tertiary_enrolment" "Tscore1519_ML"
library(ggplot2)
ggplot(merged_clean, aes(x = Tscore1519_ML, y = democratic_stability)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(merged_clean, aes(x = primary_enrolment, y = democratic_stability)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(merged_clean, aes(x=secondary_enrolment, y = democratic_stability)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Tscore1519_ML (education quality) shows a modest positive relationship
with democratic stability. Countries with higher learning outcomes tend
to have more stable democratic institutions, although the relationship
is noisy and far from perfect.
primary_enrolment shows little to no meaningful relationship with democratic stability. The fitted line is nearly flat, suggesting that simply getting children into primary school does not strongly distinguish politically stable countries from unstable ones.
secondary_enrolment shows a clearer positive relationship with democratic stability than primary enrollment. This suggests that broader access to education at more advanced levels may matter more for institutional stability than basic schooling alone
summary(lm(democratic_stability ~ Tscore1519_ML, data = merged_clean))
##
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML, data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8894 -3.1938 0.8875 2.3354 3.9614
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.335793 1.030815 2.266 0.023993 *
## Tscore1519_ML 0.009094 0.002332 3.900 0.000113 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.866 on 396 degrees of freedom
## Multiple R-squared: 0.03699, Adjusted R-squared: 0.03455
## F-statistic: 15.21 on 1 and 396 DF, p-value: 0.000113
summary(lm(democratic_stability ~ primary_enrolment, data = merged_clean))
##
## Call:
## lm(formula = democratic_stability ~ primary_enrolment, data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.4602 -3.3828 0.7667 2.2672 3.8075
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.79000 2.23951 1.692 0.0914 .
## primary_enrolment 0.02474 0.02188 1.131 0.2589
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.916 on 396 degrees of freedom
## Multiple R-squared: 0.003218, Adjusted R-squared: 0.0007004
## F-statistic: 1.278 on 1 and 396 DF, p-value: 0.2589
summary(lm(democratic_stability ~ secondary_enrolment, data = merged_clean))
##
## Call:
## lm(formula = democratic_stability ~ secondary_enrolment, data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.3656 -2.9333 0.9068 2.4296 3.9117
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.180374 0.928807 3.424 0.000681 ***
## secondary_enrolment 0.033631 0.009839 3.418 0.000696 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.879 on 396 degrees of freedom
## Multiple R-squared: 0.02866, Adjusted R-squared: 0.02621
## F-statistic: 11.68 on 1 and 396 DF, p-value: 0.000696
summary(lm(democratic_stability ~ tertiary_enrolment, data = merged_clean))
##
## Call:
## lm(formula = democratic_stability ~ tertiary_enrolment, data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4278 -2.4384 0.7459 2.2210 4.5559
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.979827 0.343364 11.591 < 2e-16 ***
## tertiary_enrolment 0.046107 0.006211 7.424 7.04e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.737 on 396 degrees of freedom
## Multiple R-squared: 0.1222, Adjusted R-squared: 0.1199
## F-statistic: 55.11 on 1 and 396 DF, p-value: 7.042e-13
summary(lm(democratic_stability ~ Tscore1519_ML + primary_enrolment, data = merged_clean))
##
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML + primary_enrolment,
## data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0818 -3.1768 0.8757 2.2880 4.2772
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.710911 2.768658 -1.340 0.1809
## Tscore1519_ML 0.010686 0.002416 4.424 1.25e-05 ***
## primary_enrolment 0.052384 0.022281 2.351 0.0192 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.85 on 395 degrees of freedom
## Multiple R-squared: 0.05028, Adjusted R-squared: 0.04547
## F-statistic: 10.46 on 2 and 395 DF, p-value: 3.763e-05
summary(lm(democratic_stability ~ Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
##
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML + primary_enrolment +
## factor(region), data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0071 -1.0954 0.0300 0.9233 4.0327
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.235390 1.746190 0.135 0.89284
## Tscore1519_ML 0.010693 0.001791 5.972 5.29e-09 ***
## primary_enrolment 0.028824 0.014113 2.042 0.04178 *
## factor(region)2 0.188710 0.286105 0.660 0.50991
## factor(region)3 2.214980 0.677807 3.268 0.00118 **
## factor(region)4 -5.682009 0.301286 -18.859 < 2e-16 ***
## factor(region)5 1.434986 0.473849 3.028 0.00262 **
## factor(region)6 -3.803029 0.258883 -14.690 < 2e-16 ***
## factor(region)7 -3.656660 0.307157 -11.905 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.634 on 389 degrees of freedom
## Multiple R-squared: 0.6926, Adjusted R-squared: 0.6863
## F-statistic: 109.6 on 8 and 389 DF, p-value: < 2.2e-16
summary(lm(status_index ~ Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
##
## Call:
## lm(formula = status_index ~ Tscore1519_ML + primary_enrolment +
## factor(region), data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.01369 -0.51117 0.00717 0.52850 1.89550
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2700522 0.8153979 1.558 0.120
## Tscore1519_ML 0.0133611 0.0008362 15.979 < 2e-16 ***
## primary_enrolment 0.0039680 0.0065900 0.602 0.547
## factor(region)2 0.1828563 0.1335990 1.369 0.172
## factor(region)3 1.2793452 0.3165076 4.042 6.39e-05 ***
## factor(region)4 -2.1396528 0.1406878 -15.209 < 2e-16 ***
## factor(region)5 1.1311970 0.2212679 5.112 5.00e-07 ***
## factor(region)6 -2.3378808 0.1208876 -19.339 < 2e-16 ***
## factor(region)7 -1.4549776 0.1434295 -10.144 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.763 on 389 degrees of freedom
## Multiple R-squared: 0.7598, Adjusted R-squared: 0.7549
## F-statistic: 153.8 on 8 and 389 DF, p-value: < 2.2e-16
summary(lm(economy_status ~ Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
##
## Call:
## lm(formula = economy_status ~ Tscore1519_ML + primary_enrolment +
## factor(region), data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.62058 -0.49463 0.03983 0.40384 2.65743
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.8137622 0.7035872 1.157 0.24815
## Tscore1519_ML 0.0156699 0.0007215 21.719 < 2e-16 ***
## primary_enrolment -0.0044118 0.0056863 -0.776 0.43830
## factor(region)2 0.2531379 0.1152793 2.196 0.02869 *
## factor(region)3 0.8545999 0.2731068 3.129 0.00189 **
## factor(region)4 -0.5177732 0.1213961 -4.265 2.51e-05 ***
## factor(region)5 1.1103272 0.1909267 5.815 1.26e-08 ***
## factor(region)6 -1.8653228 0.1043110 -17.882 < 2e-16 ***
## factor(region)7 -0.5570562 0.1237618 -4.501 8.95e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6583 on 389 degrees of freedom
## Multiple R-squared: 0.7493, Adjusted R-squared: 0.7442
## F-statistic: 145.3 on 8 and 389 DF, p-value: < 2.2e-16
colnames(merged_clean)
## [1] "country_name" "region"
## [3] "year" "status_index"
## [5] "democracy_status" "economy_status"
## [7] "governance_index" "governance_perf"
## [9] "stateness" "political_participation"
## [11] "rule_of_law" "democratic_stability"
## [13] "political_integration" "civil_society"
## [15] "conflict_intensity" "steering_capability"
## [17] "resource_efficiency" "corruption_policy"
## [19] "consensus_building" "conflict_management"
## [21] "international_cooperation" "credibility"
## [23] "primary_enrolment" "secondary_enrolment"
## [25] "tertiary_enrolment" "Tscore1519_ML"
summary(lm(democracy_status ~ Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
##
## Call:
## lm(formula = democracy_status ~ Tscore1519_ML + primary_enrolment +
## factor(region), data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.65966 -0.65361 0.03519 0.64271 2.48232
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.726342 1.126523 1.532 0.126224
## Tscore1519_ML 0.011052 0.001155 9.568 < 2e-16 ***
## primary_enrolment 0.012348 0.009104 1.356 0.175808
## factor(region)2 0.112575 0.184575 0.610 0.542276
## factor(region)3 1.704091 0.437275 3.897 0.000115 ***
## factor(region)4 -3.761532 0.194369 -19.353 < 2e-16 ***
## factor(region)5 1.152067 0.305695 3.769 0.000190 ***
## factor(region)6 -2.810439 0.167014 -16.828 < 2e-16 ***
## factor(region)7 -2.352899 0.198157 -11.874 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.054 on 389 degrees of freedom
## Multiple R-squared: 0.7322, Adjusted R-squared: 0.7266
## F-statistic: 132.9 on 8 and 389 DF, p-value: < 2.2e-16
summary(lm(governance_index ~ Tscore1519_ML + primary_enrolment + factor(region), data = merged_clean))
##
## Call:
## lm(formula = governance_index ~ Tscore1519_ML + primary_enrolment +
## factor(region), data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7747 -0.4593 0.1143 0.5311 2.1312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.055674 0.880603 -0.063 0.949622
## Tscore1519_ML 0.009894 0.000903 10.957 < 2e-16 ***
## primary_enrolment 0.014897 0.007117 2.093 0.036977 *
## factor(region)2 0.546552 0.144283 3.788 0.000176 ***
## factor(region)3 2.055019 0.341818 6.012 4.22e-09 ***
## factor(region)4 -0.988283 0.151938 -6.505 2.40e-10 ***
## factor(region)5 1.577307 0.238962 6.601 1.34e-10 ***
## factor(region)6 -1.463735 0.130555 -11.212 < 2e-16 ***
## factor(region)7 -0.667662 0.154899 -4.310 2.07e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.824 on 389 degrees of freedom
## Multiple R-squared: 0.553, Adjusted R-squared: 0.5438
## F-statistic: 60.15 on 8 and 389 DF, p-value: < 2.2e-16
summary(lm(democratic_stability ~ Tscore1519_ML + secondary_enrolment + factor(region), data = merged_clean))
##
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML + secondary_enrolment +
## factor(region), data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8902 -1.0917 0.0054 0.9926 3.9372
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.971499 0.899089 3.305 0.00104 **
## Tscore1519_ML 0.008362 0.002084 4.012 7.22e-05 ***
## secondary_enrolment 0.012732 0.007362 1.729 0.08455 .
## factor(region)2 0.341917 0.270992 1.262 0.20781
## factor(region)3 2.252358 0.681742 3.304 0.00104 **
## factor(region)4 -5.661398 0.301149 -18.799 < 2e-16 ***
## factor(region)5 1.303553 0.482078 2.704 0.00715 **
## factor(region)6 -3.854141 0.260095 -14.818 < 2e-16 ***
## factor(region)7 -3.501848 0.310148 -11.291 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.636 on 389 degrees of freedom
## Multiple R-squared: 0.6917, Adjusted R-squared: 0.6853
## F-statistic: 109.1 on 8 and 389 DF, p-value: < 2.2e-16
unique(merged_clean$country_name)
## [1] "Albania" "Algeria" "Argentina"
## [4] "Armenia" "Bahrain" "Botswana"
## [7] "Bulgaria" "Cambodia" "Chile"
## [10] "Colombia" "Croatia" "Czechia"
## [13] "El Salvador" "Estonia" "Georgia"
## [16] "Ghana" "Hungary" "Indonesia"
## [19] "Jordan" "Kazakhstan" "Latvia"
## [22] "Lithuania" "Malaysia" "Mauritius"
## [25] "Mexico" "Mongolia" "Morocco"
## [28] "Panama" "Peru" "Philippines"
## [31] "Poland" "Romania" "Serbia"
## [34] "Slovenia" "Thailand" "Tunisia"
## [37] "Ukraine" "Uruguay" "Uzbekistan"
## [40] "Belarus" "Bosnia and Herzegovina" "Jamaica"
## [43] "Montenegro" "North Macedonia" "Oman"
## [46] "Paraguay" "Azerbaijan" "Dominican Republic"
## [49] "Guatemala" "Qatar" "South Africa"
## [52] "Brazil" "Costa Rica" "Kuwait"
## [55] "Moldova" "Saudi Arabia" "Singapore"
## [58] "United Arab Emirates"
summary(lm(democratic_stability ~ Tscore1519_ML + tertiary_enrolment + factor(region), data = merged_clean))
##
## Call:
## lm(formula = democratic_stability ~ Tscore1519_ML + tertiary_enrolment +
## factor(region), data = merged_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1896 -1.0340 0.0491 1.0067 4.3256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.307482 0.909403 4.737 3.05e-06 ***
## Tscore1519_ML 0.005674 0.002183 2.599 0.009693 **
## tertiary_enrolment 0.019204 0.005441 3.530 0.000466 ***
## factor(region)2 0.269926 0.268623 1.005 0.315592
## factor(region)3 2.262495 0.670302 3.375 0.000811 ***
## factor(region)4 -5.410262 0.302296 -17.897 < 2e-16 ***
## factor(region)5 1.617775 0.471250 3.433 0.000661 ***
## factor(region)6 -3.772812 0.256413 -14.714 < 2e-16 ***
## factor(region)7 -3.371352 0.308569 -10.926 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.617 on 389 degrees of freedom
## Multiple R-squared: 0.6989, Adjusted R-squared: 0.6927
## F-statistic: 112.9 on 8 and 389 DF, p-value: < 2.2e-16
ggplot(merged_clean, aes(x = Tscore1519_ML, y = democratic_stability)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "red") +
facet_wrap(~ year) +
theme_minimal() +
labs(
title = "Democratic Stability vs Test Scores by Year",
x = "Test Score",
y = "Democratic Stability"
)
## `geom_smooth()` using formula = 'y ~ x'
ggplot(merged_clean, aes(x = Tscore1519_ML, y = democratic_stability)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE, color = "red") +
facet_wrap(~ region) +
theme_minimal() +
labs(
title = "Democratic Stability vs Test Scores by Region",
x = "Test Score",
y = "Democratic Stability"
)
## `geom_smooth()` using formula = 'y ~ x'