library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 4.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(statsr)
## Loading required package: BayesFactor
## Loading required package: coda
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## ************
## Welcome to BayesFactor 0.9.12-4.7. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
##
## Attaching package: 'statsr'
##
## The following objects are masked from 'package:openintro':
##
## calc_streak, evals, nycflights, present
library(broom)
hfi
## # A tibble: 1,458 × 123
## year ISO_code countries region pf_rol_procedural pf_rol_civil
## <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2016 ALB Albania Eastern Europe 6.66 4.55
## 2 2016 DZA Algeria Middle East & North… NA NA
## 3 2016 AGO Angola Sub-Saharan Africa NA NA
## 4 2016 ARG Argentina Latin America & the… 7.10 5.79
## 5 2016 ARM Armenia Caucasus & Central … NA NA
## 6 2016 AUS Australia Oceania 8.44 7.53
## 7 2016 AUT Austria Western Europe 8.97 7.87
## 8 2016 AZE Azerbaijan Caucasus & Central … NA NA
## 9 2016 BHS Bahamas Latin America & the… 6.93 6.01
## 10 2016 BHR Bahrain Middle East & North… NA NA
## # ℹ 1,448 more rows
## # ℹ 117 more variables: pf_rol_criminal <dbl>, pf_rol <dbl>,
## # pf_ss_homicide <dbl>, pf_ss_disappearances_disap <dbl>,
## # pf_ss_disappearances_violent <dbl>, pf_ss_disappearances_organized <dbl>,
## # pf_ss_disappearances_fatalities <dbl>, pf_ss_disappearances_injuries <dbl>,
## # pf_ss_disappearances <dbl>, pf_ss_women_fgm <dbl>,
## # pf_ss_women_missing <dbl>, pf_ss_women_inheritance_widows <dbl>, …
dim(hfi)
## [1] 1458 123
glimpse(hfi)
## Rows: 1,458
## Columns: 123
## $ year <dbl> 2016, 2016, 2016, 2016, 2016, 2016,…
## $ ISO_code <chr> "ALB", "DZA", "AGO", "ARG", "ARM", …
## $ countries <chr> "Albania", "Algeria", "Angola", "Ar…
## $ region <chr> "Eastern Europe", "Middle East & No…
## $ pf_rol_procedural <dbl> 6.661503, NA, NA, 7.098483, NA, 8.4…
## $ pf_rol_civil <dbl> 4.547244, NA, NA, 5.791960, NA, 7.5…
## $ pf_rol_criminal <dbl> 4.666508, NA, NA, 4.343930, NA, 7.3…
## $ pf_rol <dbl> 5.291752, 3.819566, 3.451814, 5.744…
## $ pf_ss_homicide <dbl> 8.920429, 9.456254, 8.060260, 7.622…
## $ pf_ss_disappearances_disap <dbl> 10, 10, 5, 10, 10, 10, 10, 10, 10, …
## $ pf_ss_disappearances_violent <dbl> 10.000000, 9.294030, 10.000000, 10.…
## $ pf_ss_disappearances_organized <dbl> 10.0, 5.0, 7.5, 7.5, 7.5, 10.0, 10.…
## $ pf_ss_disappearances_fatalities <dbl> 10.000000, 9.926119, 10.000000, 10.…
## $ pf_ss_disappearances_injuries <dbl> 10.000000, 9.990149, 10.000000, 9.9…
## $ pf_ss_disappearances <dbl> 10.000000, 8.842060, 8.500000, 9.49…
## $ pf_ss_women_fgm <dbl> 10.0, 10.0, 10.0, 10.0, 10.0, 10.0,…
## $ pf_ss_women_missing <dbl> 7.5, 7.5, 10.0, 10.0, 5.0, 10.0, 10…
## $ pf_ss_women_inheritance_widows <dbl> 5, 0, 5, 10, 10, 10, 10, 5, NA, 0, …
## $ pf_ss_women_inheritance_daughters <dbl> 5, 0, 5, 10, 10, 10, 10, 10, NA, 0,…
## $ pf_ss_women_inheritance <dbl> 5.0, 0.0, 5.0, 10.0, 10.0, 10.0, 10…
## $ pf_ss_women <dbl> 7.500000, 5.833333, 8.333333, 10.00…
## $ pf_ss <dbl> 8.806810, 8.043882, 8.297865, 9.040…
## $ pf_movement_domestic <dbl> 5, 5, 0, 10, 5, 10, 10, 5, 10, 10, …
## $ pf_movement_foreign <dbl> 10, 5, 5, 10, 5, 10, 10, 5, 10, 5, …
## $ pf_movement_women <dbl> 5, 5, 10, 10, 10, 10, 10, 5, NA, 5,…
## $ pf_movement <dbl> 6.666667, 5.000000, 5.000000, 10.00…
## $ pf_religion_estop_establish <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ pf_religion_estop_operate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ pf_religion_estop <dbl> 10.0, 5.0, 10.0, 7.5, 5.0, 10.0, 10…
## $ pf_religion_harassment <dbl> 9.566667, 6.873333, 8.904444, 9.037…
## $ pf_religion_restrictions <dbl> 8.011111, 2.961111, 7.455556, 6.850…
## $ pf_religion <dbl> 9.192593, 4.944815, 8.786667, 7.795…
## $ pf_association_association <dbl> 10.0, 5.0, 2.5, 7.5, 7.5, 10.0, 10.…
## $ pf_association_assembly <dbl> 10.0, 5.0, 2.5, 10.0, 7.5, 10.0, 10…
## $ pf_association_political_establish <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ pf_association_political_operate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ pf_association_political <dbl> 10.0, 5.0, 2.5, 5.0, 5.0, 10.0, 10.…
## $ pf_association_prof_establish <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ pf_association_prof_operate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ pf_association_prof <dbl> 10.0, 5.0, 5.0, 7.5, 5.0, 10.0, 10.…
## $ pf_association_sport_establish <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ pf_association_sport_operate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ pf_association_sport <dbl> 10.0, 5.0, 7.5, 7.5, 7.5, 10.0, 10.…
## $ pf_association <dbl> 10.0, 5.0, 4.0, 7.5, 6.5, 10.0, 10.…
## $ pf_expression_killed <dbl> 10.000000, 10.000000, 10.000000, 10…
## $ pf_expression_jailed <dbl> 10.000000, 10.000000, 10.000000, 10…
## $ pf_expression_influence <dbl> 5.0000000, 2.6666667, 2.6666667, 5.…
## $ pf_expression_control <dbl> 5.25, 4.00, 2.50, 5.50, 4.25, 7.75,…
## $ pf_expression_cable <dbl> 10.0, 10.0, 7.5, 10.0, 7.5, 10.0, 1…
## $ pf_expression_newspapers <dbl> 10.0, 7.5, 5.0, 10.0, 7.5, 10.0, 10…
## $ pf_expression_internet <dbl> 10.0, 7.5, 7.5, 10.0, 7.5, 10.0, 10…
## $ pf_expression <dbl> 8.607143, 7.380952, 6.452381, 8.738…
## $ pf_identity_legal <dbl> 0, NA, 10, 10, 7, 7, 10, 0, NA, NA,…
## $ pf_identity_parental_marriage <dbl> 10, 0, 10, 10, 10, 10, 10, 10, 10, …
## $ pf_identity_parental_divorce <dbl> 10, 5, 10, 10, 10, 10, 10, 10, 10, …
## $ pf_identity_parental <dbl> 10.0, 2.5, 10.0, 10.0, 10.0, 10.0, …
## $ pf_identity_sex_male <dbl> 10, 0, 0, 10, 10, 10, 10, 10, 10, 1…
## $ pf_identity_sex_female <dbl> 10, 0, 0, 10, 10, 10, 10, 10, 10, 1…
## $ pf_identity_sex <dbl> 10, 0, 0, 10, 10, 10, 10, 10, 10, 1…
## $ pf_identity_divorce <dbl> 5, 0, 10, 10, 5, 10, 10, 5, NA, 0, …
## $ pf_identity <dbl> 6.2500000, 0.8333333, 7.5000000, 10…
## $ pf_score <dbl> 7.596281, 5.281772, 6.111324, 8.099…
## $ pf_rank <dbl> 57, 147, 117, 42, 84, 11, 8, 131, 6…
## $ ef_government_consumption <dbl> 8.232353, 2.150000, 7.600000, 5.335…
## $ ef_government_transfers <dbl> 7.509902, 7.817129, 8.886739, 6.048…
## $ ef_government_enterprises <dbl> 8, 0, 0, 6, 8, 10, 10, 0, 7, 10, 7,…
## $ ef_government_tax_income <dbl> 9, 7, 10, 7, 5, 5, 4, 9, 10, 10, 8,…
## $ ef_government_tax_payroll <dbl> 7, 2, 9, 1, 5, 5, 3, 4, 10, 10, 8, …
## $ ef_government_tax <dbl> 8.0, 4.5, 9.5, 4.0, 5.0, 5.0, 3.5, …
## $ ef_government <dbl> 7.935564, 3.616782, 6.496685, 5.346…
## $ ef_legal_judicial <dbl> 2.6682218, 4.1867042, 1.8431292, 3.…
## $ ef_legal_courts <dbl> 3.145462, 4.327113, 1.974566, 2.930…
## $ ef_legal_protection <dbl> 4.512228, 4.689952, 2.512364, 4.255…
## $ ef_legal_military <dbl> 8.333333, 4.166667, 3.333333, 7.500…
## $ ef_legal_integrity <dbl> 4.166667, 5.000000, 4.166667, 3.333…
## $ ef_legal_enforcement <dbl> 4.3874441, 4.5075380, 2.3022004, 3.…
## $ ef_legal_restrictions <dbl> 6.485287, 6.626692, 5.455882, 6.857…
## $ ef_legal_police <dbl> 6.933500, 6.136845, 3.016104, 3.385…
## $ ef_legal_crime <dbl> 6.215401, 6.737383, 4.291197, 4.133…
## $ ef_legal_gender <dbl> 0.9487179, 0.8205128, 0.8461538, 0.…
## $ ef_legal <dbl> 5.071814, 4.690743, 2.963635, 3.904…
## $ ef_money_growth <dbl> 8.986454, 6.955962, 9.385679, 5.233…
## $ ef_money_sd <dbl> 9.484575, 8.339152, 4.986742, 5.224…
## $ ef_money_inflation <dbl> 9.743600, 8.720460, 3.054000, 2.000…
## $ ef_money_currency <dbl> 10, 5, 5, 10, 10, 10, 10, 5, 0, 10,…
## $ ef_money <dbl> 9.553657, 7.253894, 5.606605, 5.614…
## $ ef_trade_tariffs_revenue <dbl> 9.626667, 8.480000, 8.993333, 6.060…
## $ ef_trade_tariffs_mean <dbl> 9.24, 6.22, 7.72, 7.26, 8.76, 9.50,…
## $ ef_trade_tariffs_sd <dbl> 8.0240, 5.9176, 4.2544, 5.9448, 8.0…
## $ ef_trade_tariffs <dbl> 8.963556, 6.872533, 6.989244, 6.421…
## $ ef_trade_regulatory_nontariff <dbl> 5.574481, 4.962589, 3.132738, 4.466…
## $ ef_trade_regulatory_compliance <dbl> 9.4053278, 0.0000000, 0.9171598, 5.…
## $ ef_trade_regulatory <dbl> 7.489905, 2.481294, 2.024949, 4.811…
## $ ef_trade_black <dbl> 10.00000, 5.56391, 10.00000, 0.0000…
## $ ef_trade_movement_foreign <dbl> 6.306106, 3.664829, 2.946919, 5.358…
## $ ef_trade_movement_capital <dbl> 4.6153846, 0.0000000, 3.0769231, 0.…
## $ ef_trade_movement_visit <dbl> 8.2969231, 1.1062564, 0.1106256, 7.…
## $ ef_trade_movement <dbl> 6.406138, 1.590362, 2.044823, 4.697…
## $ ef_trade <dbl> 8.214900, 4.127025, 5.264754, 3.982…
## $ ef_regulation_credit_ownership <dbl> 5, 0, 8, 5, 10, 10, 8, 5, 10, 10, 5…
## $ ef_regulation_credit_private <dbl> 7.295687, 5.301526, 9.194715, 4.259…
## $ ef_regulation_credit_interest <dbl> 9, 10, 4, 7, 10, 10, 10, 9, 10, 10,…
## $ ef_regulation_credit <dbl> 7.098562, 5.100509, 7.064905, 5.419…
## $ ef_regulation_labor_minwage <dbl> 5.566667, 5.566667, 8.900000, 2.766…
## $ ef_regulation_labor_firing <dbl> 5.396399, 3.896912, 2.656198, 2.191…
## $ ef_regulation_labor_bargain <dbl> 6.234861, 5.958321, 5.172987, 3.432…
## $ ef_regulation_labor_hours <dbl> 8, 6, 4, 10, 10, 10, 6, 6, 8, 8, 10…
## $ ef_regulation_labor_dismissal <dbl> 6.299741, 7.755176, 6.632764, 2.517…
## $ ef_regulation_labor_conscription <dbl> 10, 1, 0, 10, 0, 10, 3, 1, 10, 10, …
## $ ef_regulation_labor <dbl> 6.916278, 5.029513, 4.560325, 5.151…
## $ ef_regulation_business_adm <dbl> 6.072172, 3.722341, 2.758428, 2.404…
## $ ef_regulation_business_bureaucracy <dbl> 6.000000, 1.777778, 1.333333, 6.666…
## $ ef_regulation_business_start <dbl> 9.713864, 9.243070, 8.664627, 9.122…
## $ ef_regulation_business_bribes <dbl> 4.050196, 3.765515, 1.945540, 3.260…
## $ ef_regulation_business_licensing <dbl> 7.324582, 8.523503, 8.096776, 5.253…
## $ ef_regulation_business_compliance <dbl> 7.074366, 7.029528, 6.782923, 6.508…
## $ ef_regulation_business <dbl> 6.705863, 5.676956, 4.930271, 5.535…
## $ ef_regulation <dbl> 6.906901, 5.268992, 5.518500, 5.369…
## $ ef_score <dbl> 7.54, 4.99, 5.17, 4.84, 7.57, 7.98,…
## $ ef_rank <dbl> 34, 159, 155, 160, 29, 10, 27, 106,…
## $ hf_score <dbl> 7.568140, 5.135886, 5.640662, 6.469…
## $ hf_rank <dbl> 48, 155, 142, 107, 57, 4, 16, 130, …
## $ hf_quartile <dbl> 2, 4, 4, 3, 2, 1, 1, 4, 2, 2, 4, 2,…
hfi_2016 <- hfi %>%
filter(year == 2016) %>%
select(ISO_code, countries, region, pf_score, pf_expression_control, hf_score)
ggplot(data = hfi_2016, aes(x = pf_expression_control, y = pf_score)) +
geom_point() +
labs(title = "Personal Freedom vs. Media Control (2016)",
x = "Political Control",
y = "Personal Freedom Score")
hfi_2016 %>%
summarise(cor(pf_expression_control, pf_score))
## # A tibble: 1 × 1
## `cor(pf_expression_control, pf_score)`
## <dbl>
## 1 0.845
The overall form of the graph appears to be linear. The direction is positive. As the media control increases, the freedom score also increases. The relationship is moderately strong as the points are clustered tightly around the linear path. It is noted however that as the graph is near 0, 0, many observations are scattered, and slowly become linear the further into the graph.
plot_ss(x = pf_expression_control, y = pf_score, data = hfi_2016, showSquares = TRUE)
## Click two points to make a line.
## Call:
## lm(formula = y ~ x, data = pts)
##
## Coefficients:
## (Intercept) x
## 4.2838 0.5418
##
## Sum of Squares: 102.213
m1 <- lm(pf_score ~ pf_expression_control, data = hfi_2016)
tidy(m1)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 4.28 0.149 28.8 4.23e-65
## 2 pf_expression_control 0.542 0.0271 20.0 2.31e-45
glance(m1)
## # A tibble: 1 × 12
## r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.714 0.712 0.799 400. 2.31e-45 1 -193. 391. 400.
## # ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
m_hf <- lm(hf_score ~ pf_expression_control, data = hfi_2016)
tidy(m_hf)
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 5.05 0.123 41.1 5.97e-87
## 2 pf_expression_control 0.368 0.0224 16.5 2.73e-36
hf_score = 5.05 + 0.368 * pf_expression_control
ggplot(data = hfi_2016, aes(x = pf_expression_control, y = pf_score)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula = 'y ~ x'
Prediction - hf_score = 5.05 + 0.368 * 3
Prediction - hf_score =
6.154
Residual -
hfi_2016 %>%
filter(pf_expression_control == 3) %>%
select(countries, pf_score)
## # A tibble: 1 × 2
## countries pf_score
## <chr> <dbl>
## 1 Central Afr. Rep. 5.47
Residual - 5.470 - 6.154 = -0.684
Since we have a negative
residual, our model has overestimated the value.
m1_aug <- augment(m1)
ggplot(data = m1_aug, aes(x = .fitted, y = .resid)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
xlab("Fitted values") +
ylab("Residuals")
Looking at the graph, we can see there is no pattern which indicates
the linearity assumption is met.
ggplot(data = m1_aug, aes(x = .resid)) +
geom_histogram(binwidth = 0.25) +
xlab("Residuals")
The histogram is roughly bell-shaped, which means the condition is
not violated.
The residual vs. fitted plot has a roughly similar variance throughout the graph. There is a consistent vertical spread which means constant variability condition is met.