#CLEANED DATASET: data_imp.csv#Install countrycode to be able to divide countries in "data_imp.csv" into regions and continents for better interpretation (Grouping)install.packages("countrycode")library(countrycode)library(dplyr)library(tidyverse)# Load the cleaned + imputed + merged data (data_imp.csv)data_imp <-read.csv("data_imp.csv")#Quick check if the data exists and is numeric, and compute overall mean median and mode (initial descriptive stats) of each column using summary()str(data_imp)summary(data_imp)#Get the mean median and sd, focusing on the 2 main variables: #Current health expenditure as % of GDP (che_gdp) and #labor productivity proxy (rgdpo / emp) (lab_prod)#"na.rm = TRUE" helps in ensuring missing values are ignored even if the dataset is already cleanoverall_stats <- data_imp %>%summarise(# Health expenditure (% of GDP)mean_che_gdp =mean(che_gdp, na.rm =TRUE),median_che_gdp =median(che_gdp, na.rm =TRUE),sd_che_gdp =sd(che_gdp, na.rm =TRUE),# Labor productivity (rgdpo / emp)mean_lab_prod =mean(lab_prod, na.rm =TRUE),median_lab_prod =median(lab_prod, na.rm =TRUE),sd_lab_prod =sd(lab_prod, na.rm =TRUE) )overall_stats#Add regions and continents column using countrycode through mutate function#countrycode will automatically assign which region and continent a country belongs todata_imp <- data_imp %>%mutate(region =countrycode(country, "country.name", "region"),continent =countrycode(country, "country.name", "continent") )#Get the mean, median, and sd by REGION for better interpretation using the summarise function#arrange(desc()) orders results by descending mean labor productivity for easier interpretation#As mentioned, "na.rm = TRUE" helps in ensuring missing values are ignored even if the dataset is already cleanregion_stats <- data_imp %>%group_by(region) %>%summarise(n =n(),mean_che =mean(che_gdp, na.rm =TRUE),median_che =median(che_gdp, na.rm =TRUE),sd_che =sd(che_gdp, na.rm =TRUE),mean_prod =mean(lab_prod, na.rm =TRUE),median_prod =median(lab_prod, na.rm =TRUE),sd_prod =sd(lab_prod, na.rm =TRUE) ) %>%arrange(desc(mean_prod))region_stats#Get the mean, median, and sd by CONTINENT for another way of interpretation using the summarise function#arrange(desc()) orders results by descending mean labor productivity for easier interpretation#As mentioned, "na.rm = TRUE" helps in ensuring missing values are ignored even if the dataset is already cleancontinent_stats <- data_imp %>%group_by(continent) %>%summarise(n =n(),mean_che =mean(che_gdp, na.rm =TRUE),median_che =median(che_gdp, na.rm =TRUE),sd_che =sd(che_gdp, na.rm =TRUE),mean_prod =mean(lab_prod, na.rm =TRUE),median_prod =median(lab_prod, na.rm =TRUE),sd_prod =sd(lab_prod, na.rm =TRUE) ) %>%arrange(desc(mean_prod))continent_stats