getRversion()
## [1] '4.0.3'
Importing libraries and read data
#EDA
head(df_all)
## ï..country_name year life_ladder log_GDP_per_capita social_support
## 1 Afghanistan 2008 3.724 7.370 0.451
## 2 Afghanistan 2009 4.402 7.540 0.552
## 3 Afghanistan 2010 4.758 7.647 0.539
## 4 Afghanistan 2011 3.832 7.620 0.521
## 5 Afghanistan 2012 3.783 7.705 0.521
## 6 Afghanistan 2013 3.572 7.725 0.484
## healthy_life_expectancy_at_birth freedom_to_make_life_choices generosity
## 1 50.80 0.718 0.168
## 2 51.20 0.679 0.190
## 3 51.60 0.600 0.121
## 4 51.92 0.496 0.162
## 5 52.24 0.531 0.236
## 6 52.56 0.578 0.061
## perceptions_of_corruption positive_affect negative_affect
## 1 0.882 0.518 0.258
## 2 0.850 0.584 0.237
## 3 0.707 0.618 0.275
## 4 0.731 0.611 0.267
## 5 0.776 0.710 0.268
## 6 0.823 0.621 0.273
head(df_2021)
## ï..country_name regional_indicator ladder_score
## 1 Finland Western Europe 7.842
## 2 Denmark Western Europe 7.620
## 3 Switzerland Western Europe 7.571
## 4 Iceland Western Europe 7.554
## 5 Netherlands Western Europe 7.464
## 6 Norway Western Europe 7.392
## standard_error_of_ladder_score upperwhisker lowerwhisker
## 1 0.032 7.904 7.780
## 2 0.035 7.687 7.552
## 3 0.036 7.643 7.500
## 4 0.059 7.670 7.438
## 5 0.027 7.518 7.410
## 6 0.035 7.462 7.323
## logged_GDP_per_capita social_support healthy_life_expectancy
## 1 10.775 0.954 72.0
## 2 10.933 0.954 72.7
## 3 11.117 0.942 74.4
## 4 10.878 0.983 73.0
## 5 10.932 0.942 72.4
## 6 11.053 0.954 73.3
## freedom_to_make_life_choices generosity perceptions_of_corruption
## 1 0.949 -0.098 0.186
## 2 0.946 0.030 0.179
## 3 0.919 0.025 0.292
## 4 0.955 0.160 0.673
## 5 0.913 0.175 0.338
## 6 0.960 0.093 0.270
## ladder_score_in_dystopia explained_by_Log_GDP_per_capita
## 1 2.43 1.446
## 2 2.43 1.502
## 3 2.43 1.566
## 4 2.43 1.482
## 5 2.43 1.501
## 6 2.43 1.543
## explained_by_social_support explained_by_healthy_life_expectancy
## 1 1.106 0.741
## 2 1.108 0.763
## 3 1.079 0.816
## 4 1.172 0.772
## 5 1.079 0.753
## 6 1.108 0.782
## explained_by_freedom_to_make_life_choices explained_by_generosity
## 1 0.691 0.124
## 2 0.686 0.208
## 3 0.653 0.204
## 4 0.698 0.293
## 5 0.647 0.302
## 6 0.703 0.249
## explained_by_perceptions_of_corruption dystopia_residual
## 1 0.481 3.253
## 2 0.485 2.868
## 3 0.413 2.839
## 4 0.170 2.967
## 5 0.384 2.798
## 6 0.427 2.580
#Univariat analysis
vis_dat(df_2021)
vis_dat(df_all)
#Checking missing values
#gg_miss_var(df_2021)
gg_miss_var(df_2021) + labs(y = "Checking for the missing ones")
gg_miss_var(df_all) + labs(y = "Checking for the missing ones")
#checking summary
summary(df_2021)
## ï..country_name regional_indicator ladder_score
## Length:149 Length:149 Min. :2.523
## Class :character Class :character 1st Qu.:4.852
## Mode :character Mode :character Median :5.534
## Mean :5.533
## 3rd Qu.:6.255
## Max. :7.842
## standard_error_of_ladder_score upperwhisker lowerwhisker
## Min. :0.02600 Min. :2.596 Min. :2.449
## 1st Qu.:0.04300 1st Qu.:4.991 1st Qu.:4.706
## Median :0.05400 Median :5.625 Median :5.413
## Mean :0.05875 Mean :5.648 Mean :5.418
## 3rd Qu.:0.07000 3rd Qu.:6.344 3rd Qu.:6.128
## Max. :0.17300 Max. :7.904 Max. :7.780
## logged_GDP_per_capita social_support healthy_life_expectancy
## Min. : 6.635 Min. :0.4630 Min. :48.48
## 1st Qu.: 8.541 1st Qu.:0.7500 1st Qu.:59.80
## Median : 9.569 Median :0.8320 Median :66.60
## Mean : 9.432 Mean :0.8147 Mean :64.99
## 3rd Qu.:10.421 3rd Qu.:0.9050 3rd Qu.:69.60
## Max. :11.647 Max. :0.9830 Max. :76.95
## freedom_to_make_life_choices generosity perceptions_of_corruption
## Min. :0.3820 Min. :-0.28800 Min. :0.0820
## 1st Qu.:0.7180 1st Qu.:-0.12600 1st Qu.:0.6670
## Median :0.8040 Median :-0.03600 Median :0.7810
## Mean :0.7916 Mean :-0.01513 Mean :0.7274
## 3rd Qu.:0.8770 3rd Qu.: 0.07900 3rd Qu.:0.8450
## Max. :0.9700 Max. : 0.54200 Max. :0.9390
## ladder_score_in_dystopia explained_by_Log_GDP_per_capita
## Min. :2.43 Min. :0.0000
## 1st Qu.:2.43 1st Qu.:0.6660
## Median :2.43 Median :1.0250
## Mean :2.43 Mean :0.9772
## 3rd Qu.:2.43 3rd Qu.:1.3230
## Max. :2.43 Max. :1.7510
## explained_by_social_support explained_by_healthy_life_expectancy
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.6470 1st Qu.:0.3570
## Median :0.8320 Median :0.5710
## Mean :0.7933 Mean :0.5202
## 3rd Qu.:0.9960 3rd Qu.:0.6650
## Max. :1.1720 Max. :0.8970
## explained_by_freedom_to_make_life_choices explained_by_generosity
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.4090 1st Qu.:0.105
## Median :0.5140 Median :0.164
## Mean :0.4987 Mean :0.178
## 3rd Qu.:0.6030 3rd Qu.:0.239
## Max. :0.7160 Max. :0.541
## explained_by_perceptions_of_corruption dystopia_residual
## Min. :0.0000 Min. :0.648
## 1st Qu.:0.0600 1st Qu.:2.138
## Median :0.1010 Median :2.509
## Mean :0.1351 Mean :2.430
## 3rd Qu.:0.1740 3rd Qu.:2.794
## Max. :0.5470 Max. :3.482
summary(df_all)
## ï..country_name year life_ladder log_GDP_per_capita
## Length:1949 Min. :2005 Min. :2.375 Min. : 6.635
## Class :character 1st Qu.:2010 1st Qu.:4.640 1st Qu.: 8.464
## Mode :character Median :2013 Median :5.386 Median : 9.460
## Mean :2013 Mean :5.467 Mean : 9.368
## 3rd Qu.:2017 3rd Qu.:6.283 3rd Qu.:10.353
## Max. :2020 Max. :8.019 Max. :11.648
## NA's :36
## social_support healthy_life_expectancy_at_birth freedom_to_make_life_choices
## Min. :0.2900 Min. :32.30 Min. :0.2580
## 1st Qu.:0.7498 1st Qu.:58.69 1st Qu.:0.6470
## Median :0.8355 Median :65.20 Median :0.7630
## Mean :0.8126 Mean :63.36 Mean :0.7426
## 3rd Qu.:0.9050 3rd Qu.:68.59 3rd Qu.:0.8560
## Max. :0.9870 Max. :77.10 Max. :0.9850
## NA's :13 NA's :55 NA's :32
## generosity perceptions_of_corruption positive_affect negative_affect
## Min. :-0.3350 Min. :0.0350 Min. :0.3220 Min. :0.0830
## 1st Qu.:-0.1130 1st Qu.:0.6900 1st Qu.:0.6255 1st Qu.:0.2060
## Median :-0.0255 Median :0.8020 Median :0.7220 Median :0.2580
## Mean : 0.0001 Mean :0.7471 Mean :0.7100 Mean :0.2685
## 3rd Qu.: 0.0910 3rd Qu.:0.8720 3rd Qu.:0.7990 3rd Qu.:0.3200
## Max. : 0.6980 Max. :0.9830 Max. :0.9440 Max. :0.7050
## NA's :89 NA's :110 NA's :22 NA's :16
#top 10 happiest countries in 2021
# dimensions
dimensions <- c('ladder_score',
'logged_GDP_per_capita',
'social_support',
'healthy_life_expectancy',
'freedom_to_make_life_choices',
'generosity',
'perceptions_of_corruption')
# map country to regions
country_region_dict = df_2021 %>%
select(country = ï..country_name, region = regional_indicator) %>% unique()
df_2021_long <- df_2021 %>%
select(country = ï..country_name, all_of(dimensions)) %>%
mutate(absence_of_corruption = 1- perceptions_of_corruption) %>%
pivot_longer(cols = c(all_of(dimensions),'absence_of_corruption'),
names_to = 'dimension', values_to = 'score') %>%
filter(dimension != "perceptions_of_corruption")
df_2021_tranformed <- df_2021_long %>%
group_by(dimension) %>%
mutate(min_value = min(score),
max_value = max(score)) %>%
mutate(score_pct = (score-min_value)/(max_value-min_value)) %>%
ungroup()
# getting top 10
df_2021_top10 <- df_2021_tranformed %>%
filter(dimension == "ladder_score") %>%
slice_max(score, n = 10) %>%
mutate(cat = 'top_10',
country_rank = rank(-score),
country_label = paste0(country, ' (', country_rank, ')'))
#plotting top 10 countries
ggplot(df_2021_top10, aes(x = reorder(country_label, score))) +
geom_chicklet(aes(y = 10, fill = 4.9), width = 0.5, radius = grid::unit(10, "pt")) +
geom_chicklet(aes(y = score, fill = score), width = 0.5, radius = grid::unit(10, "pt")) +
geom_text(aes(y = score), label = round(df_2021_top10$score,2), nudge_y = 0.4, size = 3) +
scale_y_continuous(expand = c(0, 0.1), position = "right", limits = c(0, 10)) +
scale_fill_gradient2(low = 'black', high = '#818aeb', mid = 'white', midpoint = 5) +
coord_flip() +
labs(y="Best possible life = 10", x = '',
title="Top 10 Happiest Countries in 2021",
subtitle="Happiest countries in Europe",
caption="Source: The World Happiness Report 2021") +
theme_ipsum(grid = '') +
theme(plot.title = element_text(size=15),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 10),
axis.title.x = element_text(size= 10, color = '#555955'),
axis.text.y = element_text(size = 10, color = 'black'),
axis.text.x = element_blank(),
legend.position = 'None')
#Getting bottom 10
# getting bottom 10
df_2021_bottom10 <- df_2021_tranformed %>%
filter(dimension == "ladder_score") %>%
mutate(country_rank = rank(score),
country_label = paste0(country, ' (', country_rank, ')')) %>%
slice_min(score, n = 10) %>%
mutate(cat = 'bottom_10')
#Plotting bottom 10
ggplot(df_2021_bottom10, aes(x = reorder(country_label, score))) +
geom_chicklet(aes(y = 10, fill = 4.9), width = 0.5, radius = grid::unit(10, "pt")) +
geom_chicklet(aes(y = score, fill = score), width = 0.5, radius = grid::unit(10, "pt")) +
geom_text(aes(y = score), label = round(df_2021_bottom10$score,2), nudge_y = 0.4, size = 3) +
scale_y_continuous(expand = c(0, 0.1), position = "right", limits = c(0, 10)) +
scale_fill_gradient2(low = '#074040', high = '#4cc2c2', mid = 'white', midpoint = 5) +
coord_flip() +
labs(y="Best possible life = 10", x = '',
title="Top 10 Saddest Countries in the World",
subtitle="Countries struck by poverty and war",
caption="Source: The World Happiness Report 2021") +
theme_ipsum(grid = '') +
theme(plot.title = element_text(size=15),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 10),
axis.title.x = element_text(size= 10, color = '#555955'),
axis.text.y = element_text(size = 10, color = 'black'),
axis.text.x = element_blank(),
legend.position = 'None')
# happiness trend in 2019 (insights wrt covid19)
df_2019_2020 <- df_all %>%
filter(year >= 2019) %>%
left_join(country_region_dict, by = c('ï..country_name' = 'country')) %>%
select(country = ï..country_name, region, year, ladder = life_ladder) %>%
pivot_wider(names_from = 'year', names_prefix = 'year', values_from = 'ladder') %>%
filter(!is.na(year2019) & !is.na(year2020)) %>%
group_by(region) %>%
summarize(happiness_2019 = mean(year2019, na.rm = TRUE),
happiness_2020 = mean(year2020, na.rm = TRUE)) %>%
mutate(diff = happiness_2020-happiness_2019) %>%
arrange(diff) %>%
mutate(region = factor(region, levels = region))
#plotting happiness levels during covid19
ggplot() +
geom_dumbbell(data = df_2019_2020 %>% filter(diff >0),
aes(y=region, x=happiness_2019, xend=happiness_2020),
size=1.5, color="#7FB185",
colour_xend = "#7FB185", colour_x = "#7FB185",
size_x = 2.5, size_xend = 5,
dot_guide=TRUE, dot_guide_size=0.5) +
geom_dumbbell(data = df_2019_2020 %>% filter(diff <0),
aes(y=region, x=happiness_2019, xend=happiness_2020),
size=1.5, color="#edae52",
colour_xend = "#edae52", colour_x = "#edae52",
size_x = 2.5, size_xend = 5,
dot_guide=TRUE, dot_guide_size=0.5) +
scale_y_discrete(limits = levels(df_2019_2020$region), expand=c(0.075,1)) +
labs(x='', y=NULL,
title="Happiness in pre to amidst Covid",
subtitle = 'Regions see increases in happiness, despite Covid',
caption= 'Source: World Happiness Report (2021)') +
geom_rect(data=df_2019_2020,
aes(xmin=7.35, xmax=7.65, ymin=-Inf, ymax=Inf),
fill="#e3e2e1") +
geom_text(data=df_2019_2020 %>% filter(region == 'South Asia'),
aes(x=happiness_2020, y=region, label= "2020"),
color="gray15", size=3, vjust=-1.5) +
geom_text(data=df_2019_2020 %>% filter(region == 'South Asia'),
aes(x=happiness_2019, y=region, label= "2019"),
color="gray15", size=3, vjust=-1.5) +
geom_text(data=df_2019_2020 %>% filter(diff>0),
aes(x=happiness_2020 , y=region, label=round(happiness_2020,2)),
size=3, hjust=-0.5) +
geom_text(data=df_2019_2020 %>% filter(diff>0),
aes(x=happiness_2019 , y=region, label=round(happiness_2019,2)),
color="gray15", size=3, hjust=1.3) +
geom_text(data=df_2019_2020 %>% filter(diff<0),
aes(x=happiness_2020 , y=region,
label=round(happiness_2020,2)),size=3, hjust=1.5) +
geom_text(data=df_2019_2020 %>% filter(diff<0),
aes(x=happiness_2019 , y=region,
label=round(happiness_2019,2)),
color="gray15", size=3, hjust=-0.3) +
geom_text(data=df_2019_2020 %>%
filter(region == 'South Asia'),
aes(x=7.5, y=region, label="DIFF"),
size=3, vjust=-1.5, fontface="bold") +
geom_text(data=df_2019_2020, aes(label=round(diff,2),
y=region, x=7.5), size=3) +
theme_ipsum(grid="") +
theme(plot.title = element_text(size=15),
plot.subtitle = element_text(size = 12),
plot.caption = element_text(size = 10),
axis.title.x = element_text(size= 10, color = '#3a403a'),
axis.text.y = element_text(size = 10, color = 'black'),
axis.text.x = element_blank(),
legend.position = 'left')
#corr mat
df_cor <- df_2021 %>%
select(corruption = perceptions_of_corruption,
generosity = generosity,
freedom = freedom_to_make_life_choices,
life_expectancy = healthy_life_expectancy,
social_support = social_support,
GDP_per_capita = logged_GDP_per_capita,
happiness = ladder_score
)
corr_matrix <- cor(df_cor)
corrplot(corr_matrix,
method = 'number',
type = "lower")
corr <- cor(df_cor)
plot_ly(colors = "RdBu") %>%
add_heatmap(x = rownames(corr), y = colnames(corr), z = corr) %>%
colorbar(limits = c(-1, 1))
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.