STAT 360 Project 1

Author

Griffin Lessinger

library(openxlsx)
library(dplyr)
library(treemap)
library(plotly)

# import the data
happy <- read.xlsx("/home/user/School/STAT360/Project 1 (Happiness)/Data/WHR25_Data_Figure_2.1v3.xlsx")
gdp <- read.csv("/home/user/School/STAT360/Project 1 (Happiness)/Data/UNdata_Export_20251123_014030794.csv")
load("/home/user/School/STAT360/Project 1 (Happiness)/Data/GNI2014.rda")

# Create relevant dataframes
happy2017 <- happy[, 1:4] |>
  filter(Year == 2017)
happy2018 <- happy[, 1:4] |>
  filter(Year == 2018)
happy2019 <- happy[, 1:4] |>
  filter(Year == 2019)
happy2020 <- happy[, 1:4] |>
  filter(Year == 2020)

happy2017$GDP.per.capita <- 0
for (country in gdp$Country.or.Area) {
  if (country %in% happy2017$Country.name) {
    happy2017[happy2017$Country.name == country, ]$GDP.per.capita <- gdp[gdp$Year == 2017 & gdp$Country.or.Area == country, ]$Value
  }
}
happy2017[happy2017$Country.name == "Bolivia", ]$GDP.per.capita <- 3306.3066
happy2017[happy2017$Country.name == "China", ]$GDP.per.capita <- 8716.2879
happy2017[happy2017$Country.name == "United Kingdom", ]$GDP.per.capita <- 40392.7182

happy2018$GDP.per.capita <- 0
for (country in gdp$Country.or.Area) {
  if (country %in% happy2018$Country.name) {
    happy2018[happy2018$Country.name == country, ]$GDP.per.capita <- gdp[gdp$Year == 2018 & gdp$Country.or.Area == country, ]$Value
  }
}
happy2018[happy2018$Country.name == "Bolivia", ]$GDP.per.capita <- 3500.7264
happy2018[happy2018$Country.name == "China", ]$GDP.per.capita <- 9791.9798
happy2018[happy2018$Country.name == "United Kingdom", ]$GDP.per.capita <- 43017.6149

happy2019$GDP.per.capita <- 0
for (country in gdp$Country.or.Area) {
  if (country %in% happy2019$Country.name) {
    happy2019[happy2019$Country.name == country, ]$GDP.per.capita <- gdp[gdp$Year == 2019 & gdp$Country.or.Area == country, ]$Value
  }
}
happy2019[happy2019$Country.name == "Bolivia", ]$GDP.per.capita <- 3503.8960
happy2019[happy2019$Country.name == "China", ]$GDP.per.capita <- 10031.4466
happy2019[happy2019$Country.name == "United Kingdom", ]$GDP.per.capita <- 42487.9520

happy2020$GDP.per.capita <- 0
for (country in gdp$Country.or.Area) {
  if (country %in% happy2020$Country.name) {
    happy2020[happy2020$Country.name == country, ]$GDP.per.capita <- gdp[gdp$Year == 2020 & gdp$Country.or.Area == country, ]$Value
  }
}
happy2020[happy2020$Country.name == "Bolivia", ]$GDP.per.capita <- 3099.9422
happy2020[happy2020$Country.name == "China", ]$GDP.per.capita <- 10299.1942
happy2020[happy2020$Country.name == "United Kingdom", ]$GDP.per.capita <- 40040.1466

happy2017 <- happy2017 |>
  arrange(Rank)
happy2018 <- happy2018 |>
  arrange(Rank)
happy2019 <- happy2019 |>
  arrange(Rank)
happy2020 <- happy2020 |>
  arrange(Rank)

1. What are the top 10 happiest countries in the most recent year available?

barplot2020 <- barplot(
  height = happy2020[1:10, ]$`Life.evaluation.(3-year.average)` - 7,
  ylim = c(-0.12, max(happy2020[1:10, ]$`Life.evaluation.(3-year.average)` - 7) + 0.1),
  yaxt = "n",
  ylab = "Happiness Index (3-year Average)",
  main = "Happiness Index, 2020",
  col = "skyblue"
)
text(
  x = barplot2020,
  y = -0.045,
  labels = happy2020[1:10, ]$Country.name,
  adj = 1,
  srt = 60,
  xpd = TRUE
)
text(
  x = barplot2020,
  y = happy2020[1:10, ]$`Life.evaluation.(3-year.average)` - 7 + 0.04,
  labels = trunc(100*happy2020[1:10, ]$`Life.evaluation.(3-year.average)`)/100,
  cex = 0.8
)
axis(
  side = 2,
  at = seq(0, 0.9, 0.1),
  labels = c("7.0", "", "7.2", "", "7.4", "", "7.6", "", "7.8", "")
)

In 2020, by a decent margin, Finland was the “happiest” country according to happiness index surveys. Next was Denmark, then Switzerland, with Austria being the final of the 10 happiest.

2. How has the happiness score changed over the 4 years for China, India, USA, Indonesia, Japan, and Russia?

fouryear <- c()
countries <- c("China", "India", "United States", "Indonesia", "Japan", "Russian Federation")

for (i in countries) {
  fouryear <- rbind(fouryear, happy2017[happy2017[["Country.name"]] == i, ]$`Life.evaluation.(3-year.average)`)
  fouryear <- rbind(fouryear, happy2018[happy2018[["Country.name"]] == i, ]$`Life.evaluation.(3-year.average)`)
  fouryear <- rbind(fouryear, happy2019[happy2019[["Country.name"]] == i, ]$`Life.evaluation.(3-year.average)`)
  fouryear <- rbind(fouryear, happy2020[happy2020[["Country.name"]] == i, ]$`Life.evaluation.(3-year.average)`)
}

fouryear <- matrix(
  fouryear,
  nrow = 4,
  byrow = FALSE
)

barplotcountries <- barplot(
  height = fouryear,
  beside  =TRUE,
  xaxt = "n",
  yaxt = "n",
  ylim = c(0, 7),
  ylab = "Happiness Index (3-year Average)",
  main = "Happiness Index, 2017 - 2020",
  col = c("navy", "royalblue", "skyblue", "lightblue")
)
axis(
  side = 1,
  at = c(3, 8, 13, 18, 23, 28),
  labels = c("China", "India", "USA", "Indonesia", "Japan", "Russia"),
  cex.axis = 1
)
axis(
  side = 2,
  at = seq(0, 7, 1),
  labels = c("0", "1", "2", "3", "4", "5", "6", "7")
)
legend(
  x = 0,
  y = 7.7,
  legend = 2017:2020,
  fill = c("navy", "royalblue", "skyblue", "lightblue"),
  bty = "n",
  xpd = TRUE
)

In China, the USA, and Japan, over the four years, the happiness index has remained roughly the same. In Indonesia, the index has improved noticeably. In India and Russia, the index has declined, though is possibly rebounding in India.

3. Which countries have consistently ranked as the top 10 happiest countries across the 4 years?

happiest <- happy2017[1:10, ]$Country.name |>
  intersect(happy2018[1:10, ]$Country.name) |>
  intersect(happy2019[1:10, ]$Country.name) |>
  intersect(happy2020[1:10, ]$Country.name)

happiest
[1] "Finland"     "Norway"      "Denmark"     "Iceland"     "Switzerland"
[6] "Netherlands" "New Zealand" "Sweden"     

4. Is there a correlation between GDP per capita and happiness score among countries?

plot(
  x = happy2017[happy2017$GDP.per.capita > 1, ]$GDP.per.capita,
  y = happy2017[happy2017$GDP.per.capita > 1, ]$`Life.evaluation.(3-year.average)`,
  xlim = c(0, 90000),
  ylim = c(2.5, 8),
  xlab = "GDP per Capita ($US)",
  ylab = "Happiness Index (3-year Average)",
  main = "GDP per Capita vs. Happiness Index",
  pch = 16,
  cex = 0.85,
  col = "darkred",
  frame.plot = FALSE
)
par(new = TRUE)
plot(
  x = happy2018[happy2018$GDP.per.capita > 1, ]$GDP.per.capita,
  y = happy2018[happy2018$GDP.per.capita > 1, ]$`Life.evaluation.(3-year.average)`,
  xlim = c(0, 90000),
  ylim = c(2.5, 8),
  xlab = "",
  ylab = "",
  xaxt = "n",
  yaxt = "n",
  main = "",
  pch = 17,
  cex = 0.85,
  col = "forestgreen",
  frame.plot = FALSE
)
par(new = TRUE)
plot(
  x = happy2019[happy2019$GDP.per.capita > 1, ]$GDP.per.capita,
  y = happy2019[happy2019$GDP.per.capita > 1, ]$`Life.evaluation.(3-year.average)`,
  xlim = c(0, 90000),
  ylim = c(2.5, 8),
  xlab = "",
  ylab = "",
  xaxt = "n",
  yaxt = "n",
  main = "",
  pch = 15,
  cex = 0.85,
  col = "#EEB422",
  frame.plot = FALSE
)
par(new = TRUE)
plot(
  x = happy2020[happy2020$GDP.per.capita > 1, ]$GDP.per.capita,
  y = happy2020[happy2020$GDP.per.capita > 1, ]$`Life.evaluation.(3-year.average)`,
  xlim = c(0, 90000),
  ylim = c(2.5, 8),
  xlab = "",
  ylab = "",
  xaxt = "n",
  yaxt = "n",
  main = "",
  pch = 18,
  cex = 0.9,
  col = "royalblue",
  frame.plot = FALSE
)
legend(
  x = "bottomright",
  bty = "n",
  legend = c("2017", "2018", "2019", "2020"),
  col = c("darkred", "forestgreen", "#EEb422", "royalblue"),
  lwd = 4,
  title = "Year"
)

Yes, there is a correlation! It seems that happiness index increases dramatically with GDP/capita increase for countries with low GDP/capita, but less-so if the baseline GDP/capita is already higher. This suggests that happiness increases greatly when basic needs are better met, but there exists diminishing returns for further increase in wealth.

5. Based on the 2020 data, how does the distribution of happiness scores vary across different regions of the world?

GNI2014$continent <- as.character(GNI2014$continent)
happy2020$Continent = ""
for (country in happy2020$Country.name) {
  if (country %in% GNI2014$country) {
    happy2020[happy2020$Country.name == country, ]$Continent <- GNI2014[GNI2014$country == country, ]$continent
  }
  if (happy2020[happy2020$Country.name == country, ]$Continent == "Seven seas (open ocean)") {
    happy2020[happy2020$Country.name == country, ]$Continent <- "Other"
  }
}

regionalboxplot <- boxplot(
  `Life.evaluation.(3-year.average)` ~ Continent,
  data = happy2020[happy2020$Continent != "" & happy2020$Continent != "Other", ],
  frame.plot = FALSE,
  main = "Happiness Index by Region, 2020",
  xlab = "Region",
  ylab = "happiness Index (3-year Average)",
  cex.axis = 0.85
)

Europe, North America, and Oceania all score highly on the happiness index. South America closely follows, but is less variable than Europe and North America. Asia has a markedly lower mean and high variability, and Africa is just generally low.

6. Based on the 2020 data, what is the relationship between social support and life expectancy in terms of their impact on happiness?

I looked for hours, I could not find the data to answer this! Wikipedia had the data on a table in the article, but because there were so many entries, I did not want to copy them by hand. I will omit this graphic for now. If you know a better place to access the data, or want me to copy the data by hand, I can do that. Just let me know!

7. Create a treemap for year 2020, using color index to display the countries in order of numerical ranking.

tree2020 <- happy2020
tree2020$Rank <- happy2020$Rank - 1
tree2020$Ranknorm <- -2*(happy2020$Rank - length(happy2020$Rank)/2)/length(happy2020$Rank)
tree2020$`Life.evaluation.(3-year.average)sq` <- tree2020$`Life.evaluation.(3-year.average)`**2

treemap(
  dtf = tree2020,
  index = c("Continent", "Country.name"),
  vSize = "Life.evaluation.(3-year.average)sq",
  vColor = "Life.evaluation.(3-year.average)",
  type = "manual",
  range = c(2.5, 8),
  palette = c("red", "yellow3", "green3"),
  title = "Treemap of 2020 Happiness Data",
  title.legend = "Happiness Index"
)

There are so many countries present that some don’t have their labels showing, as there is finite space in the plotting area to display the graphic. But, the general idea can be seen.

8. Create a choropleth map colored by the happiness score for different countries

mapdata <- data.frame(
  "country" = happy2020$Country.name,
  "value" = as.numeric(happy2020$`Life.evaluation.(3-year.average)`)
)

map <- plot_ly(
  data = mapdata,
  type = "choropleth",
  locationmode = "country names",
  locations = ~country,
  z = ~value,
  colors = c('red','yellow3','green3'),
  colorbar = list(title = "Happiness Index"),
  name = "TEST",
  marker = list(line = list(color = "black", width = 0.8))
) |> layout(title = "2020 Happiness Index")
map

Here is a choropleth map of the world based on happiness index scores in 2020. Notably, some data is missing: Greenland, Turkey, Somalia, and several other countries in Africa and the Caribbean are not present. But, the majority of the world is there.

You can zoom in on this map, move around, and hover for country names and scores!