PRÁCTICA FINAL
Análisis indice global de felicidad
library(htmltab)
LINKWIKIP= "https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad"
TablaF= '//*[@id="mw-content-text"]/div/table'
Felicidad= htmltab(doc=LINKWIKIP,which =TablaF)
names(Felicidad)
## [1] "№"
## [2] "País"
## [3] "Puntuación"
## [4] "PIB per cápita"
## [5] "Apoyo social"
## [6] "Esperanza de años de vida saludable"
## [7] "Libertad para tomar decisiones vitales"
## [8] "Generosidad"
## [9] "Percepción de la corrupción"
str(Felicidad)
## 'data.frame': 156 obs. of 9 variables:
## $ № : chr "1" "2" "3" "4" ...
## $ País : chr "Finlandia" "Colombia" "Noruega" "Dinamarca" ...
## $ Puntuación : chr "7.633" "7.594" "7.560" "7.555" ...
## $ PIB per cápita : chr "1.305" "1.456" "1.372" "1.351" ...
## $ Apoyo social : chr "1.592" "1.582" "1.595" "1.590" ...
## $ Esperanza de años de vida saludable : chr "0.874" "0.873" "0.870" "0.868" ...
## $ Libertad para tomar decisiones vitales: chr "0.681" "0.686" "0.685" "0.683" ...
## $ Generosidad : chr "0.192" "0.286" "0.285" "0.284" ...
## $ Percepción de la corrupción : chr "0.393" "0.130" "0.410" "0.408" ...
Felicidad[,c(3:9)]=lapply(Felicidad[,c(3:9)],as.numeric)
Felicidad$`№`=NULL
library(stringr)
names(Felicidad)=str_split(names(Felicidad)," ",simplify = T)[,1]
names(Felicidad)=str_replace_all(names(Felicidad), "[^[:ascii:]]", "")
str(Felicidad)
## 'data.frame': 156 obs. of 8 variables:
## $ Pas : chr "Finlandia" "Colombia" "Noruega" "Dinamarca" ...
## $ Puntuacin : num 7.63 7.59 7.56 7.55 7.5 ...
## $ PIB : num 1.3 1.46 1.37 1.35 1.34 ...
## $ Apoyo : num 1.59 1.58 1.59 1.59 1.64 ...
## $ Esperanza : num 0.874 0.873 0.87 0.868 0.914 0.927 0.878 0.896 0.876 0.913 ...
## $ Libertad : num 0.681 0.686 0.685 0.683 0.677 0.66 0.638 0.653 0.669 0.659 ...
## $ Generosidad: num 0.192 0.286 0.285 0.284 0.353 0.256 0.333 0.321 0.365 0.285 ...
## $ Percepcin : num 0.393 0.13 0.41 0.408 0.138 0.357 0.295 0.291 0.389 0.383 ...
summary(Felicidad)
## Pas Puntuacin PIB Apoyo
## Length:156 Min. :2.905 Min. :0.0000 Min. :0.000
## Class :character 1st Qu.:4.454 1st Qu.:0.6162 1st Qu.:1.067
## Mode :character Median :5.378 Median :0.9495 Median :1.255
## Mean :5.384 Mean :0.8941 Mean :1.214
## 3rd Qu.:6.168 3rd Qu.:1.2025 3rd Qu.:1.466
## Max. :7.633 Max. :2.0960 Max. :1.644
## Esperanza Libertad Generosidad Percepcin
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.4223 1st Qu.:0.3560 1st Qu.:0.1108 1st Qu.:0.05175
## Median :0.6510 Median :0.4870 Median :0.1750 Median :0.08200
## Mean :0.5989 Mean :0.4555 Mean :0.1821 Mean :0.11306
## 3rd Qu.:0.7820 3rd Qu.:0.5800 3rd Qu.:0.2422 3rd Qu.:0.13650
## Max. :1.0300 Max. :0.7240 Max. :0.5980 Max. :0.45700
Regresion=lm(Esperanza ~ PIB + Apoyo + Libertad + Generosidad + Percepcin, data = Felicidad)
summary(Regresion)
##
## Call:
## lm(formula = Esperanza ~ PIB + Apoyo + Libertad + Generosidad +
## Percepcin, data = Felicidad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44391 -0.06484 0.01431 0.07939 0.24825
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.003605 0.047854 -0.075 0.94004
## PIB 0.445871 0.036908 12.081 < 2e-16 ***
## Apoyo 0.139466 0.048666 2.866 0.00476 **
## Libertad 0.046434 0.078767 0.590 0.55641
## Generosidad -0.002610 0.116482 -0.022 0.98216
## Percepcin 0.122647 0.128416 0.955 0.34108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1295 on 150 degrees of freedom
## Multiple R-squared: 0.7373, Adjusted R-squared: 0.7285
## F-statistic: 84.18 on 5 and 150 DF, p-value: < 2.2e-16
Son significativas y directassolo PIB al 0.001 de la población con coeficiente de (0.44) y Apoyo al 0.01 con coeficiente de (0.13)
Regresion2=lm(Percepcin ~ PIB + Apoyo + Libertad + Generosidad + Esperanza, data = Felicidad)
summary(Regresion2)
##
## Call:
## lm(formula = Percepcin ~ PIB + Apoyo + Libertad + Generosidad +
## Esperanza, data = Felicidad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18413 -0.05413 -0.01276 0.04097 0.31134
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04356 0.03013 -1.446 0.150266
## PIB 0.04141 0.03269 1.267 0.207190
## Apoyo -0.03841 0.03153 -1.218 0.225052
## Libertad 0.19913 0.04727 4.213 4.34e-05 ***
## Generosidad 0.25261 0.07090 3.563 0.000492 ***
## Esperanza 0.04928 0.05160 0.955 0.341075
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08212 on 150 degrees of freedom
## Multiple R-squared: 0.3098, Adjusted R-squared: 0.2868
## F-statistic: 13.47 on 5 and 150 DF, p-value: 7.529e-11
library(dlookr)
## Loading required package: mice
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
##
## Attaching package: 'dlookr'
## The following object is masked from 'package:base':
##
## transform
normality(Felicidad[,c(2:8)])
## Warning: `cols` is now required.
## Please use `cols = c(statistic)`
## # A tibble: 7 x 4
## vars statistic p_value sample
## <chr> <dbl> <dbl> <dbl>
## 1 Puntuacin 0.984 6.48e- 2 156
## 2 PIB 0.977 1.18e- 2 156
## 3 Apoyo 0.917 8.71e- 8 156
## 4 Esperanza 0.954 5.15e- 5 156
## 5 Libertad 0.946 1.13e- 5 156
## 6 Generosidad 0.960 1.79e- 4 156
## 7 Percepcin 0.814 8.49e-13 156
Generosidad y PIB
R=formula(~ Generosidad + PIB)
cor.test(R,data=Felicidad,method = "spearm",exact=F)
##
## Spearman's rank correlation rho
##
## data: Generosidad and PIB
## S = 628978, p-value = 0.9417
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.005898693
library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
ggscatter(Felicidad,
x = "Generosidad", y = "PIB",
cor.coef = TRUE,
cor.method = "spearman",
add = "reg.line",
add.params = list(color = "blue", fill = "lightgray"),
conf.int = TRUE)
IDE= 'https://docs.google.com/spreadsheets/d/e/2PACX-1vTChEH9LIjZeT4v3H-X_m2u66UqR0vG4AB0oWRYVzq1TpGSFrY7uCCZYwb7mCWhl79kfSYiKBQYp3-L/pub?gid=1780081736&single=true&output=csv'
Tabla=read.csv(IDE, stringsAsFactors = F)
names(Tabla)
## [1] "regionUbigeo" "provinciaUbigeo" "PROVINCIA"
## [4] "IDE" "identidad" "salud"
## [7] "educacion" "saneamiento" "electrificacion"
## [10] "poblacion" "costa" "capital"
## [13] "tamano"
str(Tabla)
## 'data.frame': 195 obs. of 13 variables:
## $ regionUbigeo : int 10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
## $ provinciaUbigeo: int 10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
## $ PROVINCIA : chr "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
## $ IDE : num 0.774 0.662 0.632 0.46 0.605 ...
## $ identidad : num 98.6 94.6 97.5 86.2 96.2 ...
## $ salud : num 25.45 14.61 9.01 8.56 12.42 ...
## $ educacion : num 91.5 79.8 76.4 52.2 74.7 ...
## $ saneamiento : num 70.3 64.5 54.8 37.7 43.3 ...
## $ electrificacion: num 84 67.9 72.2 39.5 67.4 ...
## $ poblacion : int 54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
## $ costa : chr "NO" "NO" "NO" "NO" ...
## $ capital : chr "SI" "NO" "NO" "NO" ...
## $ tamano : chr "Pequena" "Pequena" "Muy pequena" "Pequena" ...
Tabla[,c(11:13)]=lapply(Tabla[,c(11:13)],as.factor)
str(Tabla)
## 'data.frame': 195 obs. of 13 variables:
## $ regionUbigeo : int 10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
## $ provinciaUbigeo: int 10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
## $ PROVINCIA : chr "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
## $ IDE : num 0.774 0.662 0.632 0.46 0.605 ...
## $ identidad : num 98.6 94.6 97.5 86.2 96.2 ...
## $ salud : num 25.45 14.61 9.01 8.56 12.42 ...
## $ educacion : num 91.5 79.8 76.4 52.2 74.7 ...
## $ saneamiento : num 70.3 64.5 54.8 37.7 43.3 ...
## $ electrificacion: num 84 67.9 72.2 39.5 67.4 ...
## $ poblacion : int 54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
## $ costa : Factor w/ 2 levels "NO","SI": 1 1 1 1 1 1 1 1 1 1 ...
## $ capital : Factor w/ 2 levels "NO","SI": 2 1 1 1 1 1 1 2 1 1 ...
## $ tamano : Factor w/ 5 levels "Grande","Mediana",..: 5 5 4 5 5 4 2 2 4 4 ...
P=formula(saneamiento ~ costa)
aggregate(P, Tabla,mean)
## costa saneamiento
## 1 NO 50.97556
## 2 SI 83.20243
normalidadTest=function(x) {y =shapiro.test(x);
c(y$statistic, y$p.value)}
resultado= aggregate(P, Tabla,
FUN = normalidadTest)
library(knitr)
shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))
| costa | SW_Statistic | Probabilidad |
|---|---|---|
| NO | 0.9949525 | 0.8529651 |
| SI | 0.9580179 | 0.2422331 |
Regresion3=lm(identidad ~ salud + educacion + saneamiento + electrificacion + poblacion, data = Tabla)
summary(Regresion3)
##
## Call:
## lm(formula = identidad ~ salud + educacion + saneamiento + electrificacion +
## poblacion, data = Tabla)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3984 -0.8723 0.1998 1.1178 3.7027
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.657e+01 1.002e+00 86.397 < 2e-16 ***
## salud -1.341e-03 2.558e-02 -0.052 0.9582
## educacion 1.251e-01 1.662e-02 7.524 2.1e-12 ***
## saneamiento 2.852e-02 9.816e-03 2.906 0.0041 **
## electrificacion -1.259e-02 1.437e-02 -0.876 0.3819
## poblacion -3.822e-08 2.386e-07 -0.160 0.8729
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.907 on 189 degrees of freedom
## Multiple R-squared: 0.4143, Adjusted R-squared: 0.3988
## F-statistic: 26.74 on 5 and 189 DF, p-value: < 2.2e-16
Rgresion4=lm(salud ~ educacion + saneamiento + electrificacion + identidad +
poblacion + capital + costa, data=Tabla)
summary(Rgresion4)
##
## Call:
## lm(formula = salud ~ educacion + saneamiento + electrificacion +
## identidad + poblacion + capital + costa, data = Tabla)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3437 -3.1715 -0.5768 2.6857 20.8256
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.895e+00 1.675e+01 -0.173 0.8629
## educacion 2.766e-02 4.964e-02 0.557 0.5780
## saneamiento 1.286e-01 2.642e-02 4.869 2.38e-06 ***
## electrificacion 1.416e-02 3.838e-02 0.369 0.7126
## identidad 2.877e-02 1.908e-01 0.151 0.8803
## poblacion 1.469e-06 6.304e-07 2.329 0.0209 *
## capitalSI 7.499e+00 1.268e+00 5.916 1.54e-08 ***
## costaSI 2.090e+00 1.222e+00 1.710 0.0889 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.996 on 187 degrees of freedom
## Multiple R-squared: 0.5504, Adjusted R-squared: 0.5336
## F-statistic: 32.7 on 7 and 187 DF, p-value: < 2.2e-16
TIENE RELACIÓN DIRECTA EL SANEAMIENTO Y SU COEFICIENTE ES SIGNIFICATIVO AL 0.001.