U2A2

María José Encinas, Rafael Gutiérrez, Carlos Alvarez, Paul Becerra

25/4/2021

U2A2: PIB Per Cápita (PPC) y su relación con el Grado de Escolaridad (GE).

Hipótesis de relación de cantidad colmenas con relación de CO2

Para este trabajo se plantea que el PPC influya en el GE. Siendo el PPC el que influye en el nivel de educación de cierta entidad.

Librerías

setwd("~/EALMV9") # Directorio de trabajo.

library("pacman") # Importa biblioteca "pacman". Se utiliza para hacer una mejor gestión de paquetes.

p_load("base64enc", "htmltools", "mime", "xfun", "prettydoc","readr", "knitr","DT","dplyr", "ggplot2","plotly", "gganimate","gifski","scales", "readxl", "tidyverse","cluster", "factoextra","NbClust","tidyr", "hpackedbubble") # Paquetes necesarios para la elaboración.

Descarga este documento

xfun::embed_file("U2A2.rmd")

Download U2A2.rmd

Descarga de datos

xfun::embed_file("GEyPPC_PorEstado.csv")

Download GEyPPC_PorEstado.csv

Correlacion

r <- read_csv("GEyPPC_PorEstado.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   GradoEscolaridad = col_double(),
##   PPC = col_double()
## )
pairs(r)

cor(r)
##                  GradoEscolaridad       PPC
## GradoEscolaridad        1.0000000 0.5649916
## PPC                     0.5649916 1.0000000
ggplot(data = r, aes(x = PPC, y = log(r$GradoEscolaridad))) + 
  geom_point(colour = "red4") +
  ggtitle("Diagrama de dispersión") +
  xlab("PPC") +
  ylab("GE") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_y_continuous(labels = comma)

library(ppcor)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
cor.test(x = r$PPC, y = log(r$GradoEscolaridad), method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  r$PPC and log(r$GradoEscolaridad)
## t = 3.6361, df = 30, p-value = 0.001027
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2532188 0.7559780
## sample estimates:
##       cor 
## 0.5530782

Significancia

cor.test(x = r$PPC,
         y = log10(r$GradoEscolaridad), 
         alternative = "two.sided",
         conf.level  = 0.95,
         method      = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  r$PPC and log10(r$GradoEscolaridad)
## t = 3.6361, df = 30, p-value = 0.001027
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2532188 0.7559780
## sample estimates:
##       cor 
## 0.5530782

Analisis de normalidad

par(mfrow = c(1, 2))
hist(r$GradoEscolaridad, breaks = 10, main = "", xlab = "Grado de Escolaridad", border = "darkred")
hist(r$PPC, breaks = 10, main = "", xlab = "PIB Per Cápita", border = "blue")

qqnorm(r$GradoEscolaridad, main = "Grado Escolaridad", col = "darkred")
qqline(r$GradoEscolaridad)

qqnorm(r$PPC, main = "PPC", col = "blue")
qqline(r$PPC)

par(mfrow = c(1,1))
shapiro.test(r$GradoEscolaridad)
## 
##  Shapiro-Wilk normality test
## 
## data:  r$GradoEscolaridad
## W = 0.95212, p-value = 0.1655
shapiro.test(r$PPC)
## 
##  Shapiro-Wilk normality test
## 
## data:  r$PPC
## W = 0.82057, p-value = 0.0001018
par(mfrow = c(1, 1))
shapiro.test(log10(r$GradoEscolaridad))
## 
##  Shapiro-Wilk normality test
## 
## data:  log10(r$GradoEscolaridad)
## W = 0.93593, p-value = 0.05744

Regresion lineal

ggplot(data = r, mapping = aes(x = r$PPC, y = r$GradoEscolaridad)) +
  geom_point(color = "firebrick", size = 2) +
  labs(title  =  'GE ~ PPC', x  =  'PIB Per Cápita', y = 'Grado Escolaridad') +
  geom_smooth(method = "lm", se = FALSE, color = "black") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))
## `geom_smooth()` using formula 'y ~ x'