Título Introducción Pregunta de investigación

El propósito de este ejercicio es llegar a comparar el IDH (ajustado) en términos de educación para Colombia vs. otra región o país.

#subimos la base de datos de solo el IDH ajustado para educación
Adjusted_Education <- read.csv2("C:/Users/YANDAHAR/Downloads/Adjusted_Education.csv")

#decantamos la base de datos para solo tener a centro América y sur america en ella
new.data <- Adjusted_Education[ which(Adjusted_Education$Continent== 7 | Adjusted_Education$Continent== 5 & Adjusted_Education$country_name!= "Canada" & Adjusted_Education$country_name!= "United States"), ]

Seguidamente expondremos los datos geograficamente en un mapa de localizacion:

#importar data set de mapa
globeMap <- read.csv("C:/Users/YANDAHAR/Downloads/globeMap.csv")

#decantar las regiones de norte america y sur america
new.datamap <- as.data.frame(globeMap[which(globeMap$Continent_Name== "North America" | globeMap$Continent_Name== "South America"), ])

#excluir de norteamerica los paises de canada y estados unidos 
new.datamap1 <- as.data.frame(new.datamap[which(new.datamap$Country_Name!= "Canada" & new.datamap$Country_Name!= "United States of America"), ])

#cambio nombre de norte america a centro america
new.datamap1$Continent_Name[new.datamap1$Continent_Name == "North America"] <- "Central America"

#instalar paquetes y llamar librerias
#install.packages("ggplot2")

library(ggplot2)
ggplot(new.datamap1, aes(x = new.datamap1$long, y = new.datamap1$lat, group = new.datamap1$group)) +
  geom_polygon(aes(fill= Continent_Name, color=Continent_Name))+
  scale_fill_manual(values = c("#596791","#ca3355","#acc11d","#3b5c5d","#f1e943","#4146ca","#e8737b","#251141"))+
  labs(title = "Regions of the World", caption = "Empty territories have no index. Source Uknown", x = "Longitude", y= "Latitude")+ theme_bw()
## Warning: Use of `new.datamap1$long` is discouraged. Use `long` instead.
## Warning: Use of `new.datamap1$lat` is discouraged. Use `lat` instead.
## Warning: Use of `new.datamap1$group` is discouraged. Use `group` instead.

#paneo del tipo de datos
str(new.data)
## 'data.frame':    204 obs. of  9 variables:
##  $ X             : int  41443 41448 41454 41457 41460 41470 41472 41476 41484 41485 ...
##  $ dimension     : chr  "Inequality" "Inequality" "Inequality" "Inequality" ...
##  $ indicator_id  : int  71406 71406 71406 71406 71406 71406 71406 71406 71406 71406 ...
##  $ indicator_name: chr  "Inequality-adjusted education index" "Inequality-adjusted education index" "Inequality-adjusted education index" "Inequality-adjusted education index" ...
##  $ iso3          : chr  "ARG" "BHS" "BLZ" "BOL" ...
##  $ country_name  : chr  "Argentina" "Bahamas" "Belize" "Bolivia (Plurinational State of)" ...
##  $ Continent     : int  7 5 5 7 7 7 7 5 5 7 ...
##  $ year          : chr  "X2010" "X2010" "X2010" "X2010" ...
##  $ value         : num  0.705 0.659 0.557 0.457 0.46 0.648 0.49 0.535 0.471 0.494 ...

Obtenemos 204 observaciones acerca de los paises que conforman centro america y latinoamerica.

#resumen del valor del indicador de inequidad en educacion
summary(new.data$value)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2370  0.4793  0.5350  0.5345  0.6248  0.7650

Con respecto al valor promedio del indicador de inequidad en educacion para la region seleccionado este se ubica en un 0.5345, con valores minimos que van desde el 0.2370 hasta el 0.7650 como valor maximo.

#graficaremos la distribucion del valor del indicador para las regiones trabajadas
overallDist <- ggplot(new.data,aes(x=value))
overallDist <- overallDist + geom_histogram(aes(y=..density..), binwidth=.005,colour="black", fill="white")
overallDist <- overallDist + geom_density(alpha=.2, fill="#FF6666")
overallDist <- overallDist + geom_vline (aes ( xintercept = 0.5345, color = 'red'))
overallDist <- overallDist + geom_text (x=0.5345, y=0.7650, label="region median")
overallDist <- overallDist + labs(title = "Region distribution of education inequality", subtitle = "Aggregated from 2010-2017", x = "Percentage of education inequality")
overallDist <- overallDist + theme(legend.position = "none")
overallDist

Para el caso de la region seleccionada podemos ver como los valores estan concentrados en la media y en los valores mas elevados, lo que se traduce en una alta desigualdad.

#graficaremos la distribucion del indicador para Colombia en contraste con las regiones estudiadas

#modificando la base para poder identificar a colombia
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
new.data_despues <- new.data %>% 
                 mutate(Continent = ifelse(country_name == "Colombia", 0, Continent))

#grafica
overallDist <- ggplot(new.data_despues,aes(x=value))
overallDist <- overallDist + geom_histogram(aes(y=..density..), binwidth=.045,colour="black", fill="white")
overallDist <- overallDist + geom_density(alpha=.2, fill="#FF6666")
overallDist <- overallDist + geom_vline (aes ( xintercept = 0.6, color = 'red'))
overallDist <- overallDist + geom_text (x=0.5345, y=0.3, label="region median")
overallDist <- overallDist + facet_grid(as.character(Continent)~.)
overallDist <- overallDist + labs(title = "Region distribution of education inequality", subtitle = "Aggregated from 2010-2017", caption = " 0:Colombia, 5:Central America, 7:South America",x = "Percentage of education inequality")
overallDist <- overallDist + theme(legend.position = "none")
overallDist

Al comparar colombia con las otras regiones, se evidencia la concentracion de desigualdad en la educacion de colombia y de la region 7 (suramerica) en la cual este pais esta ubicado; para el caso de la region 5 (centroamerica) la desigualdad esta menos concentrada.

#realizamos los heatmaps para las regiones de centro america y sur america

CentralAmerica <- subset (new.data, new.data$Continent == 5)
SouthAmerica <- subset (new.data, new.data$Continent == 7)
#CENTROAMERICA
heatmap <- ggplot(CentralAmerica, aes(x=country_name, y=year, fill=value)) 
heatmap <- heatmap + geom_tile()
heatmap <- heatmap + scale_fill_viridis_c(option="magma", limits = c(0.2,0.8))
heatmap <- heatmap + theme(axis.text.x = element_text(angle = 90))
heatmap <- heatmap + labs(title = "education inequality in Central America", subtitle = "From 2010-2017", caption = "Source: UN", x = "Country", y="Year")
heatmap

Para el caso de centro america, es evidente la desigualdad en paises como Haiti, Guatemala y Honduras.

#SOUTHAMERICA
heatmap <- ggplot(SouthAmerica, aes(x=country_name, y=year, fill=value)) 
heatmap <- heatmap + geom_tile()
heatmap <- heatmap + scale_fill_viridis_c(option="magma", limits = c(0.2,0.8))
heatmap <- heatmap + theme(axis.text.x = element_text(angle = 90))
heatmap <- heatmap + labs(title = "education inequality in South America", subtitle = "From 2010-2017", caption = "Source: UN", x = "Country", y="Year")
heatmap

new.datamap2 <- subset(new.data, new.data$year == "X2017")
colnames(new.datamap2)[5] <- "Three_Letter_Country_Code"
colnames(new.datamap2)[1] <- "ID"
mapData <- full_join(new.datamap1,new.datamap2, by = NULL)
## Joining, by = "Three_Letter_Country_Code"
                  ###c("Three_Letter_Country_Code"))

# Create ggplot object and save it in an object. The group parameter is very important because it groups all the coordinates by country
myMap <- ggplot(mapData, aes(x=long, y=lat, group = as.factor(group)))
# add geometry
myMap <- myMap + geom_polygon(aes(fill = value))
# add color palettes
myMap <- myMap + scale_fill_viridis_c(option="plasma")
# add labels
myMap <- myMap + labs(title = "World Inequality-Adjusted Development Index (IHDI) for 2017", subtitle = "Average of education, health and income inequality indexes", caption = "Grey territories have no index. Source UN", x = "Longitude", y= "Latitude")
# add margins
myMap <- myMap + theme_bw()
# plot the map
myMap