library(openxlsx)
library(dplyr)
library(treemap)
library(plotly)
# import the data
happy <- read.xlsx("/home/user/School/STAT360/Project 1 (Happiness)/Data/WHR25_Data_Figure_2.1v3.xlsx")
gdp <- read.csv("/home/user/School/STAT360/Project 1 (Happiness)/Data/UNdata_Export_20251123_014030794.csv")
load("/home/user/School/STAT360/Project 1 (Happiness)/Data/GNI2014.rda")
# Create relevant dataframes
happy2017 <- happy[, 1:4] |>
filter(Year == 2017)
happy2018 <- happy[, 1:4] |>
filter(Year == 2018)
happy2019 <- happy[, 1:4] |>
filter(Year == 2019)
happy2020 <- happy[, 1:4] |>
filter(Year == 2020)
happy2017$GDP.per.capita <- 0
for (country in gdp$Country.or.Area) {
if (country %in% happy2017$Country.name) {
happy2017[happy2017$Country.name == country, ]$GDP.per.capita <- gdp[gdp$Year == 2017 & gdp$Country.or.Area == country, ]$Value
}
}
happy2017[happy2017$Country.name == "Bolivia", ]$GDP.per.capita <- 3306.3066
happy2017[happy2017$Country.name == "China", ]$GDP.per.capita <- 8716.2879
happy2017[happy2017$Country.name == "United Kingdom", ]$GDP.per.capita <- 40392.7182
happy2018$GDP.per.capita <- 0
for (country in gdp$Country.or.Area) {
if (country %in% happy2018$Country.name) {
happy2018[happy2018$Country.name == country, ]$GDP.per.capita <- gdp[gdp$Year == 2018 & gdp$Country.or.Area == country, ]$Value
}
}
happy2018[happy2018$Country.name == "Bolivia", ]$GDP.per.capita <- 3500.7264
happy2018[happy2018$Country.name == "China", ]$GDP.per.capita <- 9791.9798
happy2018[happy2018$Country.name == "United Kingdom", ]$GDP.per.capita <- 43017.6149
happy2019$GDP.per.capita <- 0
for (country in gdp$Country.or.Area) {
if (country %in% happy2019$Country.name) {
happy2019[happy2019$Country.name == country, ]$GDP.per.capita <- gdp[gdp$Year == 2019 & gdp$Country.or.Area == country, ]$Value
}
}
happy2019[happy2019$Country.name == "Bolivia", ]$GDP.per.capita <- 3503.8960
happy2019[happy2019$Country.name == "China", ]$GDP.per.capita <- 10031.4466
happy2019[happy2019$Country.name == "United Kingdom", ]$GDP.per.capita <- 42487.9520
happy2020$GDP.per.capita <- 0
for (country in gdp$Country.or.Area) {
if (country %in% happy2020$Country.name) {
happy2020[happy2020$Country.name == country, ]$GDP.per.capita <- gdp[gdp$Year == 2020 & gdp$Country.or.Area == country, ]$Value
}
}
happy2020[happy2020$Country.name == "Bolivia", ]$GDP.per.capita <- 3099.9422
happy2020[happy2020$Country.name == "China", ]$GDP.per.capita <- 10299.1942
happy2020[happy2020$Country.name == "United Kingdom", ]$GDP.per.capita <- 40040.1466
happy2017 <- happy2017 |>
arrange(Rank)
happy2018 <- happy2018 |>
arrange(Rank)
happy2019 <- happy2019 |>
arrange(Rank)
happy2020 <- happy2020 |>
arrange(Rank)STAT 360 Project 1
1. What are the top 10 happiest countries in the most recent year available?
barplot2020 <- barplot(
height = happy2020[1:10, ]$`Life.evaluation.(3-year.average)` - 7,
ylim = c(-0.12, max(happy2020[1:10, ]$`Life.evaluation.(3-year.average)` - 7) + 0.1),
yaxt = "n",
ylab = "Happiness Index (3-year Average)",
main = "Happiness Index, 2020",
col = "skyblue"
)
text(
x = barplot2020,
y = -0.045,
labels = happy2020[1:10, ]$Country.name,
adj = 1,
srt = 60,
xpd = TRUE
)
text(
x = barplot2020,
y = happy2020[1:10, ]$`Life.evaluation.(3-year.average)` - 7 + 0.04,
labels = trunc(100*happy2020[1:10, ]$`Life.evaluation.(3-year.average)`)/100,
cex = 0.8
)
axis(
side = 2,
at = seq(0, 0.9, 0.1),
labels = c("7.0", "", "7.2", "", "7.4", "", "7.6", "", "7.8", "")
)In 2020, by a decent margin, Finland was the “happiest” country according to happiness index surveys. Next was Denmark, then Switzerland, with Austria being the final of the 10 happiest.
2. How has the happiness score changed over the 4 years for China, India, USA, Indonesia, Japan, and Russia?
fouryear <- c()
countries <- c("China", "India", "United States", "Indonesia", "Japan", "Russian Federation")
for (i in countries) {
fouryear <- rbind(fouryear, happy2017[happy2017[["Country.name"]] == i, ]$`Life.evaluation.(3-year.average)`)
fouryear <- rbind(fouryear, happy2018[happy2018[["Country.name"]] == i, ]$`Life.evaluation.(3-year.average)`)
fouryear <- rbind(fouryear, happy2019[happy2019[["Country.name"]] == i, ]$`Life.evaluation.(3-year.average)`)
fouryear <- rbind(fouryear, happy2020[happy2020[["Country.name"]] == i, ]$`Life.evaluation.(3-year.average)`)
}
fouryear <- matrix(
fouryear,
nrow = 4,
byrow = FALSE
)
barplotcountries <- barplot(
height = fouryear,
beside =TRUE,
xaxt = "n",
yaxt = "n",
ylim = c(0, 7),
ylab = "Happiness Index (3-year Average)",
main = "Happiness Index, 2017 - 2020",
col = c("navy", "royalblue", "skyblue", "lightblue")
)
axis(
side = 1,
at = c(3, 8, 13, 18, 23, 28),
labels = c("China", "India", "USA", "Indonesia", "Japan", "Russia"),
cex.axis = 1
)
axis(
side = 2,
at = seq(0, 7, 1),
labels = c("0", "1", "2", "3", "4", "5", "6", "7")
)
legend(
x = 0,
y = 7.7,
legend = 2017:2020,
fill = c("navy", "royalblue", "skyblue", "lightblue"),
bty = "n",
xpd = TRUE
)In China, the USA, and Japan, over the four years, the happiness index has remained roughly the same. In Indonesia, the index has improved noticeably. In India and Russia, the index has declined, though is possibly rebounding in India.
3. Which countries have consistently ranked as the top 10 happiest countries across the 4 years?
happiest <- happy2017[1:10, ]$Country.name |>
intersect(happy2018[1:10, ]$Country.name) |>
intersect(happy2019[1:10, ]$Country.name) |>
intersect(happy2020[1:10, ]$Country.name)
happiest[1] "Finland" "Norway" "Denmark" "Iceland" "Switzerland"
[6] "Netherlands" "New Zealand" "Sweden"
4. Is there a correlation between GDP per capita and happiness score among countries?
plot(
x = happy2017[happy2017$GDP.per.capita > 1, ]$GDP.per.capita,
y = happy2017[happy2017$GDP.per.capita > 1, ]$`Life.evaluation.(3-year.average)`,
xlim = c(0, 90000),
ylim = c(2.5, 8),
xlab = "GDP per Capita ($US)",
ylab = "Happiness Index (3-year Average)",
main = "GDP per Capita vs. Happiness Index",
pch = 16,
cex = 0.85,
col = "darkred",
frame.plot = FALSE
)
par(new = TRUE)
plot(
x = happy2018[happy2018$GDP.per.capita > 1, ]$GDP.per.capita,
y = happy2018[happy2018$GDP.per.capita > 1, ]$`Life.evaluation.(3-year.average)`,
xlim = c(0, 90000),
ylim = c(2.5, 8),
xlab = "",
ylab = "",
xaxt = "n",
yaxt = "n",
main = "",
pch = 17,
cex = 0.85,
col = "forestgreen",
frame.plot = FALSE
)
par(new = TRUE)
plot(
x = happy2019[happy2019$GDP.per.capita > 1, ]$GDP.per.capita,
y = happy2019[happy2019$GDP.per.capita > 1, ]$`Life.evaluation.(3-year.average)`,
xlim = c(0, 90000),
ylim = c(2.5, 8),
xlab = "",
ylab = "",
xaxt = "n",
yaxt = "n",
main = "",
pch = 15,
cex = 0.85,
col = "#EEB422",
frame.plot = FALSE
)
par(new = TRUE)
plot(
x = happy2020[happy2020$GDP.per.capita > 1, ]$GDP.per.capita,
y = happy2020[happy2020$GDP.per.capita > 1, ]$`Life.evaluation.(3-year.average)`,
xlim = c(0, 90000),
ylim = c(2.5, 8),
xlab = "",
ylab = "",
xaxt = "n",
yaxt = "n",
main = "",
pch = 18,
cex = 0.9,
col = "royalblue",
frame.plot = FALSE
)
legend(
x = "bottomright",
bty = "n",
legend = c("2017", "2018", "2019", "2020"),
col = c("darkred", "forestgreen", "#EEb422", "royalblue"),
lwd = 4,
title = "Year"
)Yes, there is a correlation! It seems that happiness index increases dramatically with GDP/capita increase for countries with low GDP/capita, but less-so if the baseline GDP/capita is already higher. This suggests that happiness increases greatly when basic needs are better met, but there exists diminishing returns for further increase in wealth.
5. Based on the 2020 data, how does the distribution of happiness scores vary across different regions of the world?
GNI2014$continent <- as.character(GNI2014$continent)
happy2020$Continent = ""
for (country in happy2020$Country.name) {
if (country %in% GNI2014$country) {
happy2020[happy2020$Country.name == country, ]$Continent <- GNI2014[GNI2014$country == country, ]$continent
}
if (happy2020[happy2020$Country.name == country, ]$Continent == "Seven seas (open ocean)") {
happy2020[happy2020$Country.name == country, ]$Continent <- "Other"
}
}
regionalboxplot <- boxplot(
`Life.evaluation.(3-year.average)` ~ Continent,
data = happy2020[happy2020$Continent != "" & happy2020$Continent != "Other", ],
frame.plot = FALSE,
main = "Happiness Index by Region, 2020",
xlab = "Region",
ylab = "happiness Index (3-year Average)",
cex.axis = 0.85
)Europe, North America, and Oceania all score highly on the happiness index. South America closely follows, but is less variable than Europe and North America. Asia has a markedly lower mean and high variability, and Africa is just generally low.
7. Create a treemap for year 2020, using color index to display the countries in order of numerical ranking.
tree2020 <- happy2020
tree2020$Rank <- happy2020$Rank - 1
tree2020$Ranknorm <- -2*(happy2020$Rank - length(happy2020$Rank)/2)/length(happy2020$Rank)
tree2020$`Life.evaluation.(3-year.average)sq` <- tree2020$`Life.evaluation.(3-year.average)`**2
treemap(
dtf = tree2020,
index = c("Continent", "Country.name"),
vSize = "Life.evaluation.(3-year.average)sq",
vColor = "Life.evaluation.(3-year.average)",
type = "manual",
range = c(2.5, 8),
palette = c("red", "yellow3", "green3"),
title = "Treemap of 2020 Happiness Data",
title.legend = "Happiness Index"
)There are so many countries present that some don’t have their labels showing, as there is finite space in the plotting area to display the graphic. But, the general idea can be seen.
8. Create a choropleth map colored by the happiness score for different countries
mapdata <- data.frame(
"country" = happy2020$Country.name,
"value" = as.numeric(happy2020$`Life.evaluation.(3-year.average)`)
)
map <- plot_ly(
data = mapdata,
type = "choropleth",
locationmode = "country names",
locations = ~country,
z = ~value,
colors = c('red','yellow3','green3'),
colorbar = list(title = "Happiness Index"),
name = "TEST",
marker = list(line = list(color = "black", width = 0.8))
) |> layout(title = "2020 Happiness Index")
mapHere is a choropleth map of the world based on happiness index scores in 2020. Notably, some data is missing: Greenland, Turkey, Somalia, and several other countries in Africa and the Caribbean are not present. But, the majority of the world is there.
You can zoom in on this map, move around, and hover for country names and scores!