0. Configuración
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(kableExtra)
##
## Adjuntando el paquete: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(VIM)
## Cargando paquete requerido: colorspace
## Cargando paquete requerido: grid
## VIM is ready to use.
##
## Suggestions and bug-reports can be submitted at: https://github.com/statistikat/VIM/issues
##
## Adjuntando el paquete: 'VIM'
##
## The following object is masked from 'package:datasets':
##
## sleep
library(corrplot)
## corrplot 0.95 loaded
diccionario <- read_excel("Data Dictionary.xlsx")
tabla_variables <- diccionario %>%
select(`Associated SDG GOAL`, Code, Description) %>%
arrange(`Associated SDG GOAL`)
kable(tabla_variables, caption = "Variables y ODS asociados") %>%
kable_styling()
Variables y ODS asociados
|
Associated SDG GOAL
|
Code
|
Description
|
|
Classification types
|
Regime
|
Regime classified considering the competitiveness of access to power as
well as existence of liberal principles. Regime type is a weighted index
that considers a variety of political metrics.
|
|
Classification types
|
Income
|
World Bank assigns the world’s economies to four income groups—low,
lower-middle, upper-middle, and high-income countries.
|
|
Classification types
|
Region
|
World Region as classified by UN.
|
|
Classification types
|
Continent
|
Continent classification according to UN Convention
|
|
Economic General
|
NE.EXP.GNFS.ZS
|
Exports of goods and services represent the value of all goods and other
market services provided to the rest of the world
|
|
Economic General
|
NE.CON.TOTL.ZS
|
Sum of household final consumption expenditure (private consumption) and
general government final consumption expenditure (general government
consumption).
|
|
Economic General
|
NE.DAB.TOTL.ZS
|
Sum of household final consumption expenditure (formerly private
consumption), general government final consumption expenditure (formerly
general government consumption), and gross capital formation (formerly
gross domestic investment).
|
|
Economic General
|
NY.GDP.MKTP.CD
|
Current value (in USD) of total goods and services produced within a
country.
|
|
Economic General
|
NY.GNS.ICTR.ZS
|
Calculated as gross national income less total consumption, plus net
transfers
|
|
Economic General
|
NE.CON.GOVT.ZS
|
Final consumption expenditure (formerly general government consumption)
includes all government current expenditures for purchases of goods and
services (including compensation of employees).
|
|
Economic General
|
FP.CPI.TOTL.ZG
|
Rate at which prices of goods and services brought by conusmers rise or
fall. Estimated by using consumer price indices.
|
|
Economic General
|
NE.IMP.GNFS.ZS
|
Value of all goods and other market services received from the rest of
the world
|
|
Economic General
|
NE.TRD.GNFS.ZS
|
Proportion of total GDP that is comprised by Trade activity.
|
|
Explanatory factors
|
IT.NET.USER.ZS
|
Individuals who have used the Internet (from any location) in the last 3
months
|
|
Explanatory factors
|
GH.EM.IC.LUF
|
NA
|
|
Explanatory factors
|
SH.HIV.INCD.TL.P3
|
HIV rates per 1000 people
|
|
Explanatory factors
|
IT.NET.USER.ZS
|
% of countrys total population that has access to working internet
|
|
Explanatory factors
|
SP.DYN.LE00.IN
|
Average life expectancy for newborn babies
|
|
Explanatory factors
|
SP.POP.TOTL
|
Total population
|
|
Explanatory factors
|
NY.GDP.TOTL.RT.ZS
|
% Income accrued from natural resources (e.g. Exports) as a percentage
of GDP
|
|
Explanatory factors
|
SP.URB.TOTL.IN.ZS
|
What proportion of total population live in urban areas?
|
|
Explanatory factors
|
SP.RUR.TOTL.ZS
|
Percentage of a countries total population that occupy rural areas.
|
|
Goal 1. End poverty in all its forms everywhere
|
SP_ACS_BSRVH2O
|
Percentage of the population who have sustained access to basic water
drinking services.
|
|
Goal 1. End poverty in all its forms everywhere
|
SI_POV_DAY1
|
Percentage of the population living on less than $1.90 a day at 2011
international prices. The international poverty line is currently set at
$1.90 a day at 2011 international prices.
|
|
Goal 10. Reduce inequality within and among countries
|
SI.POV.GINI
|
Score awarded based on how equitably income is dispersed within a
country
|
|
Goal 10. Reduce inequality within and among countries
|
NY.ADJ.NNTY.PC.KD.ZG
|
Percentage yearly growth of adjusted net national income divided by
number of people in country
|
|
Goal 13. Take urgent action to combat climate change and its impacts
|
NY.ADJ.SVNX.GN.ZS
|
Measure that monitors whether savings and investment in a country
compensate for depreciation of natural and physical capital
|
|
Goal 13. Take urgent action to combat climate change and its impacts
|
NY.ADJ.DCO2.GN.ZS
|
Measure that monitors whether savings and investment in a country
compensate for depreciation of natural and physical capital
|
|
Goal 13. Take urgent action to combat climate change and its impacts
|
NY.ADJ.DRES.GN.ZS
|
Measure that monitors whether savings and investment in a country
compensate for depreciation of natural and physical capital
|
|
Goal 13. Take urgent action to combat climate change and its impacts
|
NY.ADJ.DPEM.GN.ZS
|
Measure that monitors whether savings and investment in a country
compensate for depreciation of natural and physical capital
|
|
Goal 13. Take urgent action to combat climate change and its impacts
|
NY.ADJ.DFOR.GN.ZS
|
Measure, calculated as the product of unit resource rents and the excess
of roundwood harvest over natural growth.
|
|
Goal 2. End hunger, achieve food security and improved nutrition and
promote sustainable agriculture
|
SN_ITK_DEFC
|
Percentage of the population whose habitual food consumption is
insufficient to provide the dietary energy levels that are required to
maintain a normal active and healthy life.
|
|
Goal 3. Ensure healthy lives and promote well-being for all at all ages
|
SP.DYN.LE00.IN
|
Value which captures the expectated age a newborn baby will live to
|
|
Goal 4. Ensure inclusive and equitable quality education and promoted
lifelong learning oppurtunities for all
|
SE.PRM.UNER.ZS
|
Percentage of primary-school-age children who are not enrolled in
primary or secondary school. Children in the official primary age group
that are in preprimary education should be considered out of school.
|
|
Goal 4. Ensure inclusive and equitable quality education and promoted
lifelong learning oppurtunities for all
|
SE.COM.DURS
|
Duration of compulsory education is the number of years that children
are legally obliged to attend school.
|
|
Goal 4. Ensure inclusive and equitable quality education and promoted
lifelong learning oppurtunities for all
|
SE.PRM.CMPT.ZS
|
Number of new entrants (enrollments minus repeaters) in the last grade
of primary education, regardless of age, divided by the population at
the entrance age for the last grade of primary education.
|
|
Goal 4. Ensure inclusive and equitable quality education and promoted
lifelong learning oppurtunities for all
|
SE.PRE.ENRR
|
Ratio of total enrollment, regardless of age, to the population of the
age group that officially corresponds to the level of education shown
|
|
Goal 4. Ensure inclusive and equitable quality education and promoted
lifelong learning oppurtunities for all
|
SE.PRM.ENRR
|
Ratio of total enrollment, regardless of age, to the population of the
age group that officially corresponds to the level of education shown
|
|
Goal 4. Ensure inclusive and equitable quality education and promoted
lifelong learning oppurtunities for all
|
SE.SEC.ENRR
|
Ratio of total enrollment, regardless of age, to the population of the
age group that officially corresponds to the level of education shown
|
|
Goal 4. Ensure inclusive and equitable quality education and promoted
lifelong learning oppurtunities for all
|
SE.SEC.ENRR.FE
|
Ratio of female enrollment, regardless of age, to the population of the
age group that officially corresponds to the level of education shown
|
|
Goal 4. Ensure inclusive and equitable quality education and promoted
lifelong learning oppurtunities for all
|
SE.PRM.ENRL.TC.ZS
|
Average number of pupils per teacher in primary school.
|
|
Goal 5. Achieve gender equality and empower all women and girls
|
SG.GEN.PARL.ZS
|
Percentage of parliamentary seats in a single or lower chamber held by
women.
|
|
Goal 5. Achieve gender equality and empower all women and girls
|
SG.LAW.INDX
|
index measures how laws and regulations affect women’s economic
opportunity. Overall scores are calculated by taking the average score
of each of the eight areas (Going Places, Starting a Job, Getting Paid,
Getting Married, Having Children, Running a Business, Managing Assets
and Getting a Pension), with 100 representing the highest possible
score.
|
|
Goal 7. Ensure access to affordable, reliable, sustainable and modern
energy for all
|
EG.FEC.RNEW.ZS
|
Share of renewables energy in total final energy consumption.
|
|
Goal 7. Ensure access to affordable, reliable, sustainable and modern
energy for all
|
EG.ELC.RNEW.ZS
|
Share of electrity generated by renewable power plants in total
electricity generated by all types of plants.
|
|
Goal 8. Promote sustained, inclusive and sustainable economic growth,
full and productive employment and decent work for all
|
SL_TLF_UEM
|
Share of labour force without work but available and seeking employment
(male)
|
|
Goal 8. Promote sustained, inclusive and sustainable economic growth,
full and productive employment and decent work for all
|
SL_TLF_UEM
|
Share of labour force without work but available and seeking employment
(female)
|
|
Goal 9. Build resilient infrastructure, promote inclusive and
sustainable industrialization and foster innovation
|
IT_MOB_2GNTWK
|
Percentage of inhabitants living within range of a mobile-cellular
signal, irrespective of whether or not they are mobile phone subscribers
or users.
|
|
Goal 9. Build resilient infrastructure, promote inclusive and
sustainable industrialization and foster innovation
|
IT_MOB_3GNTWK
|
Percentage of inhabitants living within range of a mobile-cellular
signal, irrespective of whether or not they are mobile phone subscribers
or users.
|
|
Goal 9. Build resilient infrastructure, promote inclusive and
sustainable industrialization and foster innovation
|
EG.ELC.ACCS.ZS
|
Percentage of population with access to electricity. Electrification
data are collected from industry, national surveys and international
sources.
|
|
Goal 9. Build resilient infrastructure, promote inclusive and
sustainable industrialization and foster innovation
|
FB.ATM.TOTL.P5
|
Number of physical ATMs that allow clients to make financial
transactions in a public place.
|
|
Goal 9. Build resilient infrastructure, promote inclusive and
sustainable industrialization and foster innovation
|
IC.REG.COST.PC.FE.ZS
|
Cost to register a business is normalized by presenting it as a
percentage of the gross national income (GNI) per capita.
|
|
Goal 9. Build resilient infrastructure, promote inclusive and
sustainable industrialization and foster innovation
|
IC.REG.COST.PC.MA.ZS
|
Cost to register a business is normalized by presenting it as a
percentage of the gross national income (GNI) per capita.
|
tabla_ods <- diccionario %>%
select(`Associated SDG GOAL`) %>%
distinct() %>%
arrange(`Associated SDG GOAL`)
kable(tabla_ods, caption = "Listado de ODS") %>%
kable_styling()
Listado de ODS
|
Associated SDG GOAL
|
|
Classification types
|
|
Economic General
|
|
Explanatory factors
|
|
Goal 1. End poverty in all its forms everywhere
|
|
Goal 10. Reduce inequality within and among countries
|
|
Goal 13. Take urgent action to combat climate change and its impacts
|
|
Goal 2. End hunger, achieve food security and improved nutrition and
promote sustainable agriculture
|
|
Goal 3. Ensure healthy lives and promote well-being for all at all ages
|
|
Goal 4. Ensure inclusive and equitable quality education and promoted
lifelong learning oppurtunities for all
|
|
Goal 5. Achieve gender equality and empower all women and girls
|
|
Goal 7. Ensure access to affordable, reliable, sustainable and modern
energy for all
|
|
Goal 8. Promote sustained, inclusive and sustainable economic growth,
full and productive employment and decent work for all
|
|
Goal 9. Build resilient infrastructure, promote inclusive and
sustainable industrialization and foster innovation
|
datos <- read_csv("WorldSustainabilityDataset.csv")
## Rows: 3287 Columns: 54
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Country Name, Country Code, Continent, Income Classification (Worl...
## dbl (48): Year, Access to electricity (% of population) - EG.ELC.ACCS.ZS, Ad...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
datos <- datos %>%
rename(
Country = `Country Name`,
CountryCode = `Country Code`,
Income = `Income Classification (World Bank Definition)`,
Regime = `Regime Type (RoW Measure Definition)`,
Region = `World Regions (UN SDG Definition)`,
Continent = Continent
)
# Reemplazar los valores vacíos en columnas de tipo texto
datos <- datos %>%
mutate(across(where(is.character), ~na_if(., "")))
# Convertimos las variables cualitativas a factor
datos <- datos %>%
mutate(across(c(Country, CountryCode, Regime, Income, Region), as.factor))
str(datos)
## tibble [3,287 × 54] (S3: tbl_df/tbl/data.frame)
## $ Country : Factor w/ 173 levels "Albania","Algeria",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ CountryCode : Factor w/ 173 levels "ABW","AGO","ALB",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Year : num [1:3287] 2000 2001 2002 2003 2004 ...
## $ Access to electricity (% of population) - EG.ELC.ACCS.ZS : num [1:3287] 91.7 100 100 100 100 ...
## $ Adjusted net national income per capita (annual % growth) - NY.ADJ.NNTY.PC.KD.ZG : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Adjusted net savings, excluding particulate emission damage (% of GNI) - NY.ADJ.SVNX.GN.ZS : num [1:3287] 15.39 11.49 3.89 6.71 9.8 ...
## $ Adjusted savings: carbon dioxide damage (% of GNI) - NY.ADJ.DCO2.GN.ZS : num [1:3287] 2.39 2.51 2.72 2.73 2.68 ...
## $ Adjusted savings: natural resources depletion (% of GNI) - NY.ADJ.DRES.GN.ZS : num [1:3287] 0.00019 0.000214 0.000223 0.000243 0.000256 ...
## $ Adjusted savings: net forest depletion (% of GNI) - NY.ADJ.DFOR.GN.ZS : num [1:3287] 0.00019 0.000214 0.000223 0.000243 0.000256 ...
## $ Adjusted savings: particulate emission damage (% of GNI) - NY.ADJ.DPEM.GN.ZS : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Automated teller machines (ATMs) (per 100,000 adults) - FB.ATM.TOTL.P5 : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Broad money (% of GDP) - FM.LBL.BMNY.GD.ZS : num [1:3287] 51.8 53.6 58.5 61.2 57.3 ...
## $ Children out of school (% of primary school age) - SE.PRM.UNER.ZS : num [1:3287] 1.603 0.323 1.816 3.322 2.177 ...
## $ Compulsory education, duration (years) - SE.COM.DURS : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Cost of business start-up procedures, female (% of GNI per capita) - IC.REG.COST.PC.FE.ZS : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Cost of business start-up procedures, male (% of GNI per capita) - IC.REG.COST.PC.MA.ZS : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Exports of goods and services (% of GDP) - NE.EXP.GNFS.ZS : num [1:3287] 74.4 70.5 64.9 63.1 65.1 ...
## $ Final consumption expenditure (% of GDP) - NE.CON.TOTL.ZS : num [1:3287] 70.8 74.3 77.6 78.6 74.7 ...
## $ GDP (current US$) - NY.GDP.MKTP.CD : num [1:3287] 1.87e+09 1.92e+09 1.94e+09 2.02e+09 2.23e+09 ...
## $ GDP per capita (current US$) - NY.GDP.PCAP.CD : num [1:3287] 20618 20670 20437 20834 22568 ...
## $ General government final consumption expenditure (% of GDP) - NE.CON.GOVT.ZS : num [1:3287] 21.4 24.5 25.2 24.7 23.2 ...
## $ Gross national expenditure (% of GDP) - NE.DAB.TOTL.ZS : num [1:3287] 96.3 98.1 104.5 107.8 103.5 ...
## $ Gross savings (% of GDP) - NY.GNS.ICTR.ZS : num [1:3287] 23.7 19.3 12 14.5 17.1 ...
## $ Imports of goods and services (% of GDP) - NE.IMP.GNFS.ZS : num [1:3287] 70.7 68.5 69.4 70.9 68.6 ...
## $ Inflation, consumer prices (annual %) - FP.CPI.TOTL.ZG : num [1:3287] 4.04 2.88 3.32 3.66 2.53 ...
## $ Primary completion rate, total (% of relevant age group) - SE.PRM.CMPT.ZS : num [1:3287] 97.1 94.4 94.8 90.2 90.6 ...
## $ Proportion of seats held by women in national parliaments (%) - SG.GEN.PARL.ZS : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Pupil-teacher ratio, primary - SE.PRM.ENRL.TC.ZS : num [1:3287] 19.1 18.9 19.1 18.4 18.5 ...
## $ Renewable electricity output (% of total electricity output) - EG.ELC.RNEW.ZS : num [1:3287] 0 0 0 0 0 0 0 0 0 0 ...
## $ Renewable energy consumption (% of total final energy consumption) - EG.FEC.RNEW.ZS : num [1:3287] 0.175 0.181 0.181 0.185 0.187 ...
## $ School enrollment, preprimary (% gross) - SE.PRE.ENRR : num [1:3287] 95.9 97.6 98.4 99.6 98.7 ...
## $ School enrollment, primary (% gross) - SE.PRM.ENRR : num [1:3287] 111 109 111 109 111 ...
## $ School enrollment, secondary (% gross) - SE.SEC.ENRR : num [1:3287] 96.5 98 100.5 99.1 97.3 ...
## $ Trade (% of GDP) - NE.TRD.GNFS.ZS : num [1:3287] 145 139 134 134 134 ...
## $ Women Business and the Law Index Score (scale 1-100) - SG.LAW.INDX : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Prevalence of undernourishment (%) - SN_ITK_DEFC : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Proportion of population below international poverty line (%) - SI_POV_DAY1 : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Proportion of population covered by at least a 2G mobile network (%) - IT_MOB_2GNTWK : num [1:3287] NA NA NA NA NA 90 99 99 99 NA ...
## $ Proportion of population covered by at least a 3G mobile network (%) - IT_MOB_3GNTWK : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Proportion of population using basic drinking water services (%) - SP_ACS_BSRVH2O : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Unemployment rate, male (%) - SL_TLF_UEM : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Unemployment rate, women (%) - SL_TLF_UEM : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Annual production-based emissions of carbon dioxide (CO2) measured in million tonnes - GH.EM.IC.LUF: num [1:3287] 2.38 2.41 2.44 2.56 2.62 ...
## $ Continent : chr [1:3287] "North America" "North America" "North America" "North America" ...
## $ Gini index (World Bank estimate) - SI.POV.GINI : num [1:3287] NA NA NA NA NA NA NA NA NA NA ...
## $ Income : Factor w/ 4 levels "High income",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Individuals using the Internet (% of population) - IT.NET.USER.ZS : num [1:3287] NA 17.1 18.8 20.8 23 25.4 28 30.9 52 58 ...
## $ Life expectancy at birth, total (years) - SP.DYN.LE00.IN : num [1:3287] NA 73.9 73.9 74 74.2 ...
## $ Population, total - SP.POP.TOTL : num [1:3287] NA 92892 94992 97016 98744 ...
## $ Regime : Factor w/ 4 levels "Closed Autocracy",..: NA NA NA NA NA NA NA NA NA NA ...
## $ Rural population (% of total population) - SP.RUR.TOTL.ZS : num [1:3287] 53.3 53.7 54 54.4 54.8 ...
## $ Total natural resources rents (% of GDP) - NY.GDP.TOTL.RT.ZS : num [1:3287] NA 0.000712 0.000657 0.0007 0.000685 ...
## $ Urban population (% of total population) - SP.URB.TOTL.IN.ZS : num [1:3287] NA 46.3 46 45.6 45.2 ...
## $ Region : Factor w/ 7 levels "Central and Southern Asia",..: 4 4 4 4 4 4 4 4 4 4 ...
summary(datos$Income)
## High income Low income Lower-middle income Upper-middle income
## 899 753 868 765
## NA's
## 2
ggplot(datos, aes(x = `Gini index (World Bank estimate) - SI.POV.GINI`)) +
geom_histogram(binwidth = 2, fill = "steelblue") + labs(title = "Distribución del índice GINI", x = "GINI", y = "Frecuencia") + theme_minimal()
## Warning: Removed 1984 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(datos, aes(x = `Annual production-based emissions of carbon dioxide (CO2) measured in million tonnes - GH.EM.IC.LUF`)) + geom_histogram(binwidth = 50, fill = "darkgreen") + labs(title = "Distribución de emisiones GHE", x = "Emisiones", y = "Frecuencia") + theme_minimal()
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_bin()`).

datos_2018 <- datos %>% filter(Year == 2018)
top_contaminantes <- datos_2018 %>%
arrange(desc(`Annual production-based emissions of carbon dioxide (CO2) measured in million tonnes - GH.EM.IC.LUF`)) %>%
select(Country, `Annual production-based emissions of carbon dioxide (CO2) measured in million tonnes - GH.EM.IC.LUF`) %>%
head(10)
kable(top_contaminantes, caption = "Top países contaminantes en 2018") %>%
kable_styling()
Top países contaminantes en 2018
|
Country
|
Annual production-based emissions of carbon dioxide (CO2) measured in
million tonnes - GH.EM.IC.LUF
|
|
China
|
9956.569
|
|
United States
|
5424.882
|
|
India
|
2591.324
|
|
Russian Federation
|
1691.360
|
|
Japan
|
1135.688
|
|
Iran, Islamic Rep.
|
755.402
|
|
Germany
|
755.362
|
|
Korea, Rep.
|
634.934
|
|
Canada
|
586.505
|
|
Saudi Arabia
|
576.758
|
# 1. Renombrar las variables con etiquetas cortas para el corrplot
datos_cor_limpio <- datos %>%
select(
Pobreza_Int = `Proportion of population below international poverty line (%) - SI_POV_DAY1`,
GINI = `Gini index (World Bank estimate) - SI.POV.GINI`,
Esperanza_Vida = `Life expectancy at birth, total (years) - SP.DYN.LE00.IN`,
Ninos_Sin_Escuela = `Children out of school (% of primary school age) - SE.PRM.UNER.ZS`,
PIB_Total = `GDP (current US$) - NY.GDP.MKTP.CD`
) %>%
na.omit()
cor_matrix_limpio <- cor(datos_cor_limpio)
# 2. Generar corrplot más legible y visual
corrplot(cor_matrix_limpio,
method = "color",
tl.cex = 0.9, # Tamaño de la etiqueta del texto
addCoef.col = "black", # Añadir valores de correlación
type = "upper", # Mostrar solo la mitad superior
diag = FALSE, # Eliminar la diagonal
order = "hclust", # Reordenar por clústeres
title = "Matriz de Correlación de Variables Clave (Etiquetas Cortas)", # Título
mar=c(0,0,1,0) # Ajuste de márgenes para el título
)

correlaciones <- datos %>%
select(where(is.numeric)) %>%
summarise(across(everything(), ~cor(., datos$`Life expectancy at birth, total (years) - SP.DYN.LE00.IN`, use = "complete.obs"))) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Correlación") %>%
arrange(desc(Correlación))
kable(correlaciones, caption = "Correlación con esperanza de vida") %>%
kable_styling()
Correlación con esperanza de vida
|
Variable
|
Correlación
|
|
Life expectancy at birth, total (years) - SP.DYN.LE00.IN
|
1.0000000
|
|
Access to electricity (% of population) - EG.ELC.ACCS.ZS
|
0.8462019
|
|
School enrollment, secondary (% gross) - SE.SEC.ENRR
|
0.8062534
|
|
Proportion of population using basic drinking water services (%) -
SP_ACS_BSRVH2O
|
0.7659222
|
|
Primary completion rate, total (% of relevant age group) -
SE.PRM.CMPT.ZS
|
0.7280693
|
|
Individuals using the Internet (% of population) - IT.NET.USER.ZS
|
0.7179673
|
|
School enrollment, preprimary (% gross) - SE.PRE.ENRR
|
0.6812603
|
|
Urban population (% of total population) - SP.URB.TOTL.IN.ZS
|
0.6443750
|
|
Proportion of population covered by at least a 3G mobile network (%) -
IT_MOB_3GNTWK
|
0.6221733
|
|
Automated teller machines (ATMs) (per 100,000 adults) - FB.ATM.TOTL.P5
|
0.6062708
|
|
GDP per capita (current US\() - NY.GDP.PCAP.CD
</td>
<td style="text-align:right;"> 0.5862631 </td>
</tr>
<tr>
<td style="text-align:left;"> Proportion of
population covered by at least a 2G mobile network (%) - IT_MOB_2GNTWK
</td>
<td style="text-align:right;"> 0.5778520 </td>
</tr>
<tr>
<td style="text-align:left;"> Broad money (% of GDP)
- FM.LBL.BMNY.GD.ZS </td>
<td style="text-align:right;"> 0.5550244 </td>
</tr>
<tr>
<td style="text-align:left;"> Women Business and the
Law Index Score (scale 1-100) - SG.LAW.INDX </td>
<td style="text-align:right;"> 0.4016865 </td>
</tr>
<tr>
<td style="text-align:left;"> Compulsory education,
duration (years) - SE.COM.DURS </td>
<td style="text-align:right;"> 0.3811764 </td>
</tr>
<tr>
<td style="text-align:left;"> Exports of goods and
services (% of GDP) - NE.EXP.GNFS.ZS </td>
<td style="text-align:right;"> 0.3241664 </td>
</tr>
<tr>
<td style="text-align:left;"> Trade (% of GDP) -
NE.TRD.GNFS.ZS </td>
<td style="text-align:right;"> 0.2637263 </td>
</tr>
<tr>
<td style="text-align:left;"> Adjusted net savings,
excluding particulate emission damage (% of GNI) - NY.ADJ.SVNX.GN.ZS
</td>
<td style="text-align:right;"> 0.2571140 </td>
</tr>
<tr>
<td style="text-align:left;"> Proportion of seats
held by women in national parliaments (%) - SG.GEN.PARL.ZS </td>
<td style="text-align:right;"> 0.2189707 </td>
</tr>
<tr>
<td style="text-align:left;"> GDP (current
US\)) - NY.GDP.MKTP.CD
|
0.2021001
|
|
Year
|
0.1939736
|
|
Imports of goods and services (% of GDP) - NE.IMP.GNFS.ZS
|
0.1704533
|
|
General government final consumption expenditure (% of GDP) -
NE.CON.GOVT.ZS
|
0.1428950
|
|
Annual production-based emissions of carbon dioxide (CO2) measured in
million tonnes - GH.EM.IC.LUF
|
0.1242841
|
|
Gross savings (% of GDP) - NY.GNS.ICTR.ZS
|
0.1242556
|
|
School enrollment, primary (% gross) - SE.PRM.ENRR
|
0.0600071
|
|
Population, total - SP.POP.TOTL
|
0.0116152
|
|
Adjusted net national income per capita (annual % growth) -
NY.ADJ.NNTY.PC.KD.ZG
|
-0.0191602
|
|
Adjusted savings: carbon dioxide damage (% of GNI) - NY.ADJ.DCO2.GN.ZS
|
-0.0364124
|
|
Unemployment rate, male (%) - SL_TLF_UEM
|
-0.0675769
|
|
Unemployment rate, women (%) - SL_TLF_UEM
|
-0.1135456
|
|
Inflation, consumer prices (annual %) - FP.CPI.TOTL.ZG
|
-0.1911438
|
|
Adjusted savings: natural resources depletion (% of GNI) -
NY.ADJ.DRES.GN.ZS
|
-0.2593173
|
|
Gross national expenditure (% of GDP) - NE.DAB.TOTL.ZS
|
-0.2665952
|
|
Renewable electricity output (% of total electricity output) -
EG.ELC.RNEW.ZS
|
-0.2789410
|
|
Total natural resources rents (% of GDP) - NY.GDP.TOTL.RT.ZS
|
-0.3014496
|
|
Final consumption expenditure (% of GDP) - NE.CON.TOTL.ZS
|
-0.3047017
|
|
Adjusted savings: net forest depletion (% of GNI) - NY.ADJ.DFOR.GN.ZS
|
-0.3320652
|
|
Gini index (World Bank estimate) - SI.POV.GINI
|
-0.3529437
|
|
Cost of business start-up procedures, male (% of GNI per capita) -
IC.REG.COST.PC.MA.ZS
|
-0.5000100
|
|
Cost of business start-up procedures, female (% of GNI per capita) -
IC.REG.COST.PC.FE.ZS
|
-0.5000397
|
|
Prevalence of undernourishment (%) - SN_ITK_DEFC
|
-0.5736113
|
|
Rural population (% of total population) - SP.RUR.TOTL.ZS
|
-0.6443750
|
|
Children out of school (% of primary school age) - SE.PRM.UNER.ZS
|
-0.6612297
|
|
Renewable energy consumption (% of total final energy consumption) -
EG.FEC.RNEW.ZS
|
-0.6950236
|
|
Proportion of population below international poverty line (%) -
SI_POV_DAY1
|
-0.7651884
|
|
Adjusted savings: particulate emission damage (% of GNI) -
NY.ADJ.DPEM.GN.ZS
|
-0.7923531
|
|
Pupil-teacher ratio, primary - SE.PRM.ENRL.TC.ZS
|
-0.7934361
|
10. Relación entre pobreza y acceso a educación
## Gráfico limpio entre PIB per cápita y pobreza (con filtro de pobreza)
# Filtrar datos del último año, crear etiquetas cortas y ELIMINAR EXTREMOS DE POBREZA (Ej. > 60%)
datos_final <- datos %>%
filter(Year == max(Year),
# Aplicar filtro para valores extremos de pobreza (ej. limitar a 60%)
`Proportion of population below international poverty line (%) - SI_POV_DAY1` <= 60
) %>%
mutate(Etiqueta = CountryCode)
# Crear gráfico con mejoras visuales (temas, línea de tendencia)
grafico_pib_pobreza <- ggplot(datos_final, aes(
x = `GDP per capita (current US$) - NY.GDP.PCAP.CD`,
y = `Proportion of population below international poverty line (%) - SI_POV_DAY1`,
color = Region)) +
geom_point(alpha = 0.8, size = 3.5) + # Puntos más grandes
geom_smooth(method = "lm", se = FALSE, color = "darkred", linetype = "dashed") + # Añadir línea de tendencia
geom_text(data = datos_final %>%
# Etiquetar países con pobreza > 30% para resaltarlos
filter(`Proportion of population below international poverty line (%) - SI_POV_DAY1` > 30 |
`GDP per capita (current US$) - NY.GDP.PCAP.CD` > 100000),
aes(label = Etiqueta),
size = 3.5, vjust = -0.8, check_overlap = TRUE) +
scale_x_log10(labels = scales::dollar) + # Escala logarítmica con formato de dólar
labs(
title = "Relación entre PIB per cápita y pobreza (Valores Extremos Filtrados)",
subtitle = "El filtro elimina países con Población bajo pobreza internacional > 60%",
x = "Log(PIB per cápita en US$)",
y = "Población bajo la línea de pobreza (%)",
color = "Región Mundial") +
theme_bw() + # Tema limpio
theme(plot.title = element_text(face = "bold"),
legend.position = "bottom")
print(grafico_pib_pobreza)
## `geom_smooth()` using formula = 'y ~ x'

# todo el código aquí
datos_final <- datos %>% filter(Year == max(Year))
tendencia <- datos_final %>%
group_by(Region) %>%
summarise(
Media_Pobreza = mean(`Proportion of population below international poverty line (%) - SI_POV_DAY1`, na.rm = TRUE),
Mediana_Pobreza = median(`Proportion of population below international poverty line (%) - SI_POV_DAY1`, na.rm = TRUE),
Media_GINI = mean(`Gini index (World Bank estimate) - SI.POV.GINI`, na.rm = TRUE),
Mediana_GINI = median(`Gini index (World Bank estimate) - SI.POV.GINI`, na.rm = TRUE),
Media_Esperanza = mean(`Life expectancy at birth, total (years) - SP.DYN.LE00.IN`, na.rm = TRUE),
Mediana_Esperanza = median(`Life expectancy at birth, total (years) - SP.DYN.LE00.IN`, na.rm = TRUE)
)
dispersion <- datos_final %>%
group_by(Region) %>%
summarise(
SD_Pobreza = sd(`Proportion of population below international poverty line (%) - SI_POV_DAY1`, na.rm = TRUE),
MAD_Pobreza = mad(`Proportion of population below international poverty line (%) - SI_POV_DAY1`, na.rm = TRUE),
SD_GINI = sd(`Gini index (World Bank estimate) - SI.POV.GINI`, na.rm = TRUE),
MAD_GINI = mad(`Gini index (World Bank estimate) - SI.POV.GINI`, na.rm = TRUE),
SD_Esperanza = sd(`Life expectancy at birth, total (years) - SP.DYN.LE00.IN`, na.rm = TRUE),
MAD_Esperanza = mad(`Life expectancy at birth, total (years) - SP.DYN.LE00.IN`, na.rm = TRUE)
)
kable(tendencia, caption = "Medidas de tendencia central") %>%
kable_styling()
Medidas de tendencia central
|
Region
|
Media_Pobreza
|
Mediana_Pobreza
|
Media_GINI
|
Mediana_GINI
|
Media_Esperanza
|
Mediana_Esperanza
|
|
Central and Southern Asia
|
1.3750000
|
0.55
|
32.27500
|
29.7
|
72.47567
|
71.51650
|
|
Eastern and South-Eastern Asia
|
3.1000000
|
2.25
|
37.28333
|
37.1
|
75.41964
|
75.51950
|
|
Europe and Northern America
|
0.4483871
|
0.20
|
31.00323
|
30.2
|
79.14412
|
80.89268
|
|
Latin America and Caribbean
|
3.2071429
|
1.70
|
45.39286
|
45.4
|
74.90558
|
74.99200
|
|
Northern Africa and Western Asia
|
1.1800000
|
0.00
|
34.28000
|
34.4
|
76.02221
|
76.45300
|
|
Oceania
|
NaN
|
NA
|
NaN
|
NA
|
74.31789
|
71.81800
|
|
Sub-Saharan Africa
|
33.1250000
|
41.05
|
38.55000
|
35.4
|
62.58853
|
62.97300
|
kable(dispersion, caption = "Medidas de dispersión") %>%
kable_styling()
Medidas de dispersión
|
Region
|
SD_Pobreza
|
MAD_Pobreza
|
SD_GINI
|
MAD_GINI
|
SD_Esperanza
|
MAD_Esperanza
|
|
Central and Southern Asia
|
2.0336748
|
0.44478
|
6.732694
|
2.89107
|
3.313894
|
1.980754
|
|
Eastern and South-Eastern Asia
|
3.6353817
|
2.29803
|
3.225782
|
2.29803
|
6.164585
|
8.732514
|
|
Europe and Northern America
|
0.7334262
|
0.29652
|
4.609663
|
4.44780
|
3.506372
|
3.609661
|
|
Latin America and Caribbean
|
3.9118094
|
1.70499
|
4.352977
|
4.81845
|
3.305274
|
2.680541
|
|
Northern Africa and Western Asia
|
1.9524344
|
0.00000
|
5.779879
|
2.96520
|
3.130638
|
3.036365
|
|
Oceania
|
NA
|
NA
|
NA
|
NA
|
6.436534
|
4.427044
|
|
Sub-Saharan Africa
|
22.2036596
|
8.00604
|
8.644652
|
2.66868
|
5.027565
|
4.400357
|