link="https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad"
path='//*/div/table/tbody'
library(htmltab)
felicidad=htmltab(doc = link,which = path)
str(felicidad)
## 'data.frame': 156 obs. of 9 variables:
## $ â„– : chr "1" "2" "3" "4" ...
## $ PaÃs : chr "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
## $ Puntuación : chr "7.633" "7.560" "7.555" "7.495" ...
## $ PIB per cápita : chr "1.305" "1.372" "1.351" "1.343" ...
## $ Apoyo social : chr "1.592" "1.595" "1.590" "1.644" ...
## $ Esperanza de años de vida saludable : chr "0.874" "0.870" "0.868" "0.914" ...
## $ Libertad para tomar decisiones vitales: chr "0.681" "0.685" "0.683" "0.677" ...
## $ Generosidad : chr "0.192" "0.285" "0.284" "0.353" ...
## $ Percepción de la corrupción : chr "0.393" "0.410" "0.408" "0.138" ...
names(felicidad)
## [1] "â„–"
## [2] "PaÃs"
## [3] "Puntuación"
## [4] "PIB per cápita"
## [5] "Apoyo social"
## [6] "Esperanza de años de vida saludable"
## [7] "Libertad para tomar decisiones vitales"
## [8] "Generosidad"
## [9] "Percepción de la corrupción"
newN=c("n","pais","puntuacion","pbi","apoyosoc","esperanza","libertad","generosidad","percepcioncorrupcion")
names(felicidad)=newN
felicidad$n =NULL
felicidad[,c(2:8)]=lapply(felicidad[,c(2:8)],as.numeric)
str(felicidad)
## 'data.frame': 156 obs. of 8 variables:
## $ pais : chr "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
## $ puntuacion : num 7.63 7.56 7.55 7.5 7.49 ...
## $ pbi : num 1.3 1.37 1.35 1.34 1.42 ...
## $ apoyosoc : num 1.59 1.59 1.59 1.64 1.55 ...
## $ esperanza : num 0.874 0.87 0.868 0.914 0.927 0.878 0.896 0.876 0.913 0.91 ...
## $ libertad : num 0.681 0.685 0.683 0.677 0.66 0.638 0.653 0.669 0.659 0.647 ...
## $ generosidad : num 0.192 0.285 0.284 0.353 0.256 0.333 0.321 0.365 0.285 0.361 ...
## $ percepcioncorrupcion: num 0.393 0.41 0.408 0.138 0.357 0.295 0.291 0.389 0.383 0.302 ...
summary(felicidad)
## pais puntuacion pbi apoyosoc
## Length:156 Min. :2.905 Min. :0.0000 Min. :0.000
## Class :character 1st Qu.:4.454 1st Qu.:0.6162 1st Qu.:1.067
## Mode :character Median :5.378 Median :0.9495 Median :1.255
## Mean :5.376 Mean :0.8941 Mean :1.214
## 3rd Qu.:6.168 3rd Qu.:1.2025 3rd Qu.:1.466
## Max. :7.633 Max. :2.0960 Max. :1.644
## esperanza libertad generosidad percepcioncorrupcion
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.4223 1st Qu.:0.3560 1st Qu.:0.1108 1st Qu.:0.05175
## Median :0.6510 Median :0.4870 Median :0.1750 Median :0.08200
## Mean :0.5989 Mean :0.4555 Mean :0.1821 Mean :0.11306
## 3rd Qu.:0.7820 3rd Qu.:0.5800 3rd Qu.:0.2422 3rd Qu.:0.13650
## Max. :1.0300 Max. :0.7240 Max. :0.5980 Max. :0.45700
shapiro.test(felicidad$pbi)
##
## Shapiro-Wilk normality test
##
## data: felicidad$pbi
## W = 0.97748, p-value = 0.01176
Es No Normal
cor.test(felicidad$pbi,felicidad$generosidad,method = "spearman")
## Warning in cor.test.default(felicidad$pbi, felicidad$generosidad, method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: felicidad$pbi and felicidad$generosidad
## S = 628978, p-value = 0.9417
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.005898693
library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
ggscatter(felicidad,x="pbi",y="generosidad",cor.coef = TRUE,cor.method = "spearman",add = "reg.line",add.params = list(color="blue",fill="lightgray"),conf.int = TRUE)
el p value es de0.94 => NO HAY CORRELACION
shapiro.test(felicidad$percepcioncorrupcion)
##
## Shapiro-Wilk normality test
##
## data: felicidad$percepcioncorrupcion
## W = 0.81431, p-value = 8.486e-13
es No Normal
names(felicidad)
## [1] "pais" "puntuacion" "pbi"
## [4] "apoyosoc" "esperanza" "libertad"
## [7] "generosidad" "percepcioncorrupcion"
todo1=lm(percepcioncorrupcion~puntuacion+pbi+apoyosoc+esperanza+libertad+generosidad,data = felicidad)
summary(todo1)
##
## Call:
## lm(formula = percepcioncorrupcion ~ puntuacion + pbi + apoyosoc +
## esperanza + libertad + generosidad, data = felicidad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17247 -0.05901 -0.01474 0.04397 0.33498
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.08207 0.03828 -2.144 0.03364 *
## puntuacion 0.02049 0.01267 1.617 0.10801
## pbi 0.01869 0.03542 0.528 0.59848
## apoyosoc -0.05800 0.03362 -1.725 0.08655 .
## esperanza 0.03145 0.05250 0.599 0.54998
## libertad 0.16812 0.05078 3.311 0.00117 **
## generosidad 0.23753 0.07113 3.339 0.00106 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08168 on 149 degrees of freedom
## Multiple R-squared: 0.3217, Adjusted R-squared: 0.2944
## F-statistic: 11.78 on 6 and 149 DF, p-value: 8.687e-11
shapiro.test(felicidad$esperanza)
##
## Shapiro-Wilk normality test
##
## data: felicidad$esperanza
## W = 0.95408, p-value = 5.151e-05
es No Normal
todo2=lm(esperanza~puntuacion+pbi+apoyosoc+percepcioncorrupcion+libertad+generosidad,data = felicidad)
summary(todo2)
##
## Call:
## lm(formula = esperanza ~ puntuacion + pbi + apoyosoc + percepcioncorrupcion +
## libertad + generosidad, data = felicidad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45271 -0.05889 0.00928 0.07845 0.25994
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.09737 0.06004 -1.622 0.1070
## puntuacion 0.04902 0.01952 2.512 0.0131 *
## pbi 0.37513 0.04592 8.168 1.23e-13 ***
## apoyosoc 0.08534 0.05246 1.627 0.1059
## percepcioncorrupcion 0.07642 0.12754 0.599 0.5500
## libertad -0.02142 0.08199 -0.261 0.7943
## generosidad -0.02816 0.11493 -0.245 0.8068
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1273 on 149 degrees of freedom
## Multiple R-squared: 0.7479, Adjusted R-squared: 0.7378
## F-statistic: 73.68 on 6 and 149 DF, p-value: < 2.2e-16
csv='https://docs.google.com/spreadsheets/d/e/2PACX-1vTB7vQ7svenf3vyl0fPsh3r-m9FrM7abeJN1rcXok8PdEdtZx7zwMJNKsPMd7wqAA/pub?output=csv'
Ide=read.csv(csv,stringsAsFactors = F)
str(Ide)
## 'data.frame': 195 obs. of 13 variables:
## $ regionUbigeo : int 10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
## $ provinciaUbigeo: int 10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
## $ PROVINCIA : chr "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
## $ IDE : num 0.774 0.662 0.632 0.46 0.605 ...
## $ identidad : num 98.6 94.6 97.5 86.2 96.2 ...
## $ salud : num 25.45 14.61 9.01 8.56 12.42 ...
## $ educacion : num 91.5 79.8 76.4 52.2 74.7 ...
## $ saneamiento : num 70.3 64.5 54.8 37.7 43.3 ...
## $ electrificacion: num 84 67.9 72.2 39.5 67.4 ...
## $ poblacion : int 54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
## $ costa : chr "NO" "NO" "NO" "NO" ...
## $ capital : chr "SI" "NO" "NO" "NO" ...
## $ tamano : chr "Pequena" "Pequena" "Muy pequena" "Pequena" ...
Ide[,c(11:13)]=lapply(Ide[,c(11:13)],as.factor)
Ide=Ide[complete.cases(Ide),]
summary(Ide)
## regionUbigeo provinciaUbigeo PROVINCIA IDE
## Min. : 10000 Min. : 10100 Length:195 Min. :0.4245
## 1st Qu.: 50000 1st Qu.: 50750 Class :character 1st Qu.:0.5806
## Median :100000 Median :101100 Mode :character Median :0.6367
## Mean :113795 Mean :114358 Mean :0.6519
## 3rd Qu.:170000 3rd Qu.:170250 3rd Qu.:0.7164
## Max. :250000 Max. :250400 Max. :0.9104
## identidad salud educacion saneamiento
## Min. :81.97 Min. : 2.598 Min. :44.03 Min. : 0.2351
## 1st Qu.:96.54 1st Qu.: 6.485 1st Qu.:74.06 1st Qu.:40.9610
## Median :97.90 Median :10.380 Median :82.49 Median :54.2229
## Mean :97.30 Mean :11.919 Mean :80.42 Mean :56.2641
## 3rd Qu.:98.87 3rd Qu.:14.744 3rd Qu.:89.02 3rd Qu.:72.4629
## Max. :99.50 Max. :44.741 Max. :99.50 Max. :99.5000
## electrificacion poblacion costa capital tamano
## Min. :33.83 Min. : 4251 NO:163 NO:170 Grande :15
## 1st Qu.:61.31 1st Qu.: 32188 SI: 32 SI: 25 Mediana :46
## Median :74.29 Median : 63039 Muy grande : 1
## Mean :72.11 Mean : 154543 Muy pequena:69
## 3rd Qu.:85.02 3rd Qu.: 121804 Pequena :64
## Max. :99.50 Max. :8481415
levels(Ide$costa)
## [1] "NO" "SI"
levels(Ide$capital)
## [1] "NO" "SI"
levels(Ide$tamano)
## [1] "Grande" "Mediana" "Muy grande" "Muy pequena" "Pequena"
shapiro.test(Ide$IDE)
##
## Shapiro-Wilk normality test
##
## data: Ide$IDE
## W = 0.98349, p-value = 0.02167
NO NORMAL
library(DescTools)
Skew(Ide$IDE)
## [1] 0.3486982
Asimetria positiva
shapiro.test(Ide$saneamiento)
##
## Shapiro-Wilk normality test
##
## data: Ide$saneamiento
## W = 0.98626, p-value = 0.05485
Es normal Es parametrica, Se tomaria pruebas Parametricas, como la Prueba T para seguir con el desarrollo de la relacion.
shapiro.test(Ide$identidad)
##
## Shapiro-Wilk normality test
##
## data: Ide$identidad
## W = 0.72664, p-value < 2.2e-16
no normal
names(Ide)
## [1] "regionUbigeo" "provinciaUbigeo" "PROVINCIA"
## [4] "IDE" "identidad" "salud"
## [7] "educacion" "saneamiento" "electrificacion"
## [10] "poblacion" "costa" "capital"
## [13] "tamano"
identidadDep=lm(identidad~salud+educacion+saneamiento+electrificacion+poblacion+costa+capital+tamano,data = Ide)
summary(identidadDep)
##
## Call:
## lm(formula = identidad ~ salud + educacion + saneamiento + electrificacion +
## poblacion + costa + capital + tamano, data = Ide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1774 -0.8996 0.2286 1.1466 4.0155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.530e+01 1.642e+00 51.935 < 2e-16 ***
## salud -3.668e-03 2.897e-02 -0.127 0.89938
## educacion 1.190e-01 1.689e-02 7.045 3.64e-11 ***
## saneamiento 2.971e-02 1.067e-02 2.785 0.00592 **
## electrificacion -8.407e-03 1.470e-02 -0.572 0.56819
## poblacion 9.183e-07 1.937e-06 0.474 0.63607
## costaSI 1.979e-01 4.812e-01 0.411 0.68139
## capitalSI 2.562e-01 6.391e-01 0.401 0.68898
## tamanoMediana 1.178e+00 9.838e-01 1.198 0.23259
## tamanoMuy grande -6.758e+00 1.543e+01 -0.438 0.66192
## tamanoMuy pequena 1.767e+00 1.185e+00 1.491 0.13777
## tamanoPequena 1.030e+00 1.120e+00 0.920 0.35884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.905 on 183 degrees of freedom
## Multiple R-squared: 0.4343, Adjusted R-squared: 0.4003
## F-statistic: 12.77 on 11 and 183 DF, p-value: < 2.2e-16
educacion tiene mas efecto que saneamiento
shapiro.test(Ide$salud)
##
## Shapiro-Wilk normality test
##
## data: Ide$salud
## W = 0.86893, p-value = 5.948e-12
no normal
saludDep=lm(salud~identidad+educacion+saneamiento+electrificacion+poblacion+costa+capital+tamano,data = Ide)
summary(saludDep)
##
## Call:
## lm(formula = salud ~ identidad + educacion + saneamiento + electrificacion +
## poblacion + costa + capital + tamano, data = Ide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3402 -3.2121 -0.2517 2.6801 19.4999
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.176e+00 1.662e+01 0.251 0.8019
## identidad -2.388e-02 1.886e-01 -0.127 0.8994
## educacion 1.140e-02 4.858e-02 0.235 0.8147
## saneamiento 1.351e-01 2.594e-02 5.209 5.07e-07 ***
## electrificacion 2.556e-02 3.751e-02 0.681 0.4965
## poblacion 1.399e-06 4.946e-06 0.283 0.7776
## costaSI 2.171e+00 1.218e+00 1.783 0.0763 .
## capitalSI 7.036e+00 1.546e+00 4.551 9.73e-06 ***
## tamanoMediana -3.462e+00 2.507e+00 -1.381 0.1690
## tamanoMuy grande -1.258e+00 3.940e+01 -0.032 0.9746
## tamanoMuy pequena -3.255e-01 3.043e+00 -0.107 0.9149
## tamanoPequena -2.534e+00 2.859e+00 -0.887 0.3765
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.86 on 183 degrees of freedom
## Multiple R-squared: 0.5837, Adjusted R-squared: 0.5587
## F-statistic: 23.33 on 11 and 183 DF, p-value: < 2.2e-16
Las privincias capital tendrian mejor nivel de salud