U2A2: PIB Per Cápita (PPC) y su relación con el Grado de Escolaridad (GE).
Hipótesis de relación de cantidad colmenas con relación de CO2
Para este trabajo se plantea que el PPC influya en el GE. Siendo el PPC el que influye en el nivel de educación de cierta entidad.
Librerías
setwd("~/EALMV9") # Directorio de trabajo.
library("pacman") # Importa biblioteca "pacman". Se utiliza para hacer una mejor gestión de paquetes.
p_load("base64enc", "htmltools", "mime", "xfun", "prettydoc","readr", "knitr","DT","dplyr", "ggplot2","plotly", "gganimate","gifski","scales", "readxl", "tidyverse","cluster", "factoextra","NbClust","tidyr", "hpackedbubble") # Paquetes necesarios para la elaboración.Correlacion
r <- read_csv("GEyPPC_PorEstado.csv")##
## -- Column specification --------------------------------------------------------
## cols(
## GradoEscolaridad = col_double(),
## PPC = col_double()
## )
pairs(r)cor(r)## GradoEscolaridad PPC
## GradoEscolaridad 1.0000000 0.5649916
## PPC 0.5649916 1.0000000
ggplot(data = r, aes(x = PPC, y = log(r$GradoEscolaridad))) +
geom_point(colour = "red4") +
ggtitle("Diagrama de dispersión") +
xlab("PPC") +
ylab("GE") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_y_continuous(labels = comma)library(ppcor)## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
cor.test(x = r$PPC, y = log(r$GradoEscolaridad), method = "pearson")##
## Pearson's product-moment correlation
##
## data: r$PPC and log(r$GradoEscolaridad)
## t = 3.6361, df = 30, p-value = 0.001027
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2532188 0.7559780
## sample estimates:
## cor
## 0.5530782
Significancia
cor.test(x = r$PPC,
y = log10(r$GradoEscolaridad),
alternative = "two.sided",
conf.level = 0.95,
method = "pearson")##
## Pearson's product-moment correlation
##
## data: r$PPC and log10(r$GradoEscolaridad)
## t = 3.6361, df = 30, p-value = 0.001027
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2532188 0.7559780
## sample estimates:
## cor
## 0.5530782
Analisis de normalidad
par(mfrow = c(1, 2))
hist(r$GradoEscolaridad, breaks = 10, main = "", xlab = "Grado de Escolaridad", border = "darkred")
hist(r$PPC, breaks = 10, main = "", xlab = "PIB Per Cápita", border = "blue")qqnorm(r$GradoEscolaridad, main = "Grado Escolaridad", col = "darkred")
qqline(r$GradoEscolaridad)qqnorm(r$PPC, main = "PPC", col = "blue")
qqline(r$PPC)par(mfrow = c(1,1))shapiro.test(r$GradoEscolaridad)##
## Shapiro-Wilk normality test
##
## data: r$GradoEscolaridad
## W = 0.95212, p-value = 0.1655
shapiro.test(r$PPC)##
## Shapiro-Wilk normality test
##
## data: r$PPC
## W = 0.82057, p-value = 0.0001018
par(mfrow = c(1, 1))
shapiro.test(log10(r$GradoEscolaridad))##
## Shapiro-Wilk normality test
##
## data: log10(r$GradoEscolaridad)
## W = 0.93593, p-value = 0.05744
Regresion lineal
ggplot(data = r, mapping = aes(x = r$PPC, y = r$GradoEscolaridad)) +
geom_point(color = "firebrick", size = 2) +
labs(title = 'GE ~ PPC', x = 'PIB Per Cápita', y = 'Grado Escolaridad') +
geom_smooth(method = "lm", se = FALSE, color = "black") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))## `geom_smooth()` using formula 'y ~ x'