library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
# Load CSV directly from Downloads
hci_raw <- read_csv("/Users/yj/Downloads/HCI1.csv",
col_types = cols(.default = "c"), # treat all columns as character
locale = locale(encoding = "UTF-8"))
# Clean column names
hci_raw <- clean_names(hci_raw)
# Quick check
glimpse(hci_raw)
## Rows: 7
## Columns: 15
## $ series_name <chr> "Human Capital Index (HCI) (scale 0-1)", "Human Capital I…
## $ series_code <chr> "HD.HCI.OVRL", "HD.HCI.OVRL", NA, NA, NA, NA, NA
## $ country_name <chr> "Italy", "Israel", NA, NA, NA, NA, NA
## $ country_code <chr> "ITA", "ISR", NA, NA, NA, NA, NA
## $ x2010_yr2010 <chr> "0.750265836715698", "0.718269050121307", NA, NA, NA, NA,…
## $ x2011_yr2011 <chr> "..", "..", NA, NA, NA, NA, NA
## $ x2012_yr2012 <chr> "..", "..", NA, NA, NA, NA, NA
## $ x2013_yr2013 <chr> "..", "..", NA, NA, NA, NA, NA
## $ x2014_yr2014 <chr> "..", "..", NA, NA, NA, NA, NA
## $ x2015_yr2015 <chr> "..", "..", NA, NA, NA, NA, NA
## $ x2016_yr2016 <chr> "..", "..", NA, NA, NA, NA, NA
## $ x2017_yr2017 <chr> "0.769", "0.763", NA, NA, NA, NA, NA
## $ x2018_yr2018 <chr> "0.752880394458771", "0.763064444065094", NA, NA, NA, NA,…
## $ x2019_yr2019 <chr> "..", "..", NA, NA, NA, NA, NA
## $ x2020_yr2020 <chr> "0.727819681167603", "0.733995318412781", NA, NA, NA, NA,…
library(dplyr)
library(tidyr)
hci_long <- hci_raw %>%
pivot_longer(
cols = starts_with("x"), # year columns after clean_names()
names_to = "year",
values_to = "hci"
) %>%
mutate(
year = as.integer(gsub("x|_yr.*", "", year)), # extract numeric year
hci = as.numeric(hci) # convert '..' to NA
) %>%
filter(country_name %in% c("Israel", "Italy"),
year >= 2010, year <= 2020)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `hci = as.numeric(hci)`.
## Caused by warning:
## ! NAs introduced by coercion
# Average HCI per country
avg_hci <- hci_long %>%
group_by(country_name) %>%
summarise(avg_hci = mean(hci, na.rm = TRUE))
avg_hci
## # A tibble: 2 × 2
## country_name avg_hci
## <chr> <dbl>
## 1 Israel 0.745
## 2 Italy 0.750
# Line chart
library(ggplot2)
ggplot(hci_long, aes(x = year, y = hci, color = country_name)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
labs(title = "Human Capital Index (2010–2020): Israel vs Italy",
x = "Year", y = "HCI (0–1)", color = "Country") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.