https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad
link="https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad"
path='//*/div/table/tbody'
library(htmltab)
indice=htmltab(doc = link,which = path)
str(indice)
## 'data.frame': 156 obs. of 9 variables:
## $ â„– : chr "1" "2" "3" "4" ...
## $ PaÃs : chr "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
## $ Puntuación : chr "7.633" "7.560" "7.555" "7.495" ...
## $ PIB per cápita : chr "1.305" "1.372" "1.351" "1.343" ...
## $ Apoyo social : chr "1.592" "1.595" "1.590" "1.644" ...
## $ Esperanza de años de vida saludable : chr "0.874" "0.870" "0.868" "0.914" ...
## $ Libertad para tomar decisiones vitales: chr "0.681" "0.685" "0.683" "0.677" ...
## $ Generosidad : chr "0.192" "0.285" "0.284" "0.353" ...
## $ Percepción de la corrupción : chr "0.393" "0.410" "0.408" "0.138" ...
names(indice)
## [1] "â„–"
## [2] "PaÃs"
## [3] "Puntuación"
## [4] "PIB per cápita"
## [5] "Apoyo social"
## [6] "Esperanza de años de vida saludable"
## [7] "Libertad para tomar decisiones vitales"
## [8] "Generosidad"
## [9] "Percepción de la corrupción"
newN=c("n","pais","puntuacion","pbi","apoyosoc","esperanza","libertad","generosidad","percepcioncorrupcion")
names(indice)=newN
indice$n =NULL
indice[,c(2:8)]=lapply(indice[,c(2:8)],as.numeric)
str(indice)
## 'data.frame': 156 obs. of 8 variables:
## $ pais : chr "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
## $ puntuacion : num 7.63 7.56 7.55 7.5 7.49 ...
## $ pbi : num 1.3 1.37 1.35 1.34 1.42 ...
## $ apoyosoc : num 1.59 1.59 1.59 1.64 1.55 ...
## $ esperanza : num 0.874 0.87 0.868 0.914 0.927 0.878 0.896 0.876 0.913 0.91 ...
## $ libertad : num 0.681 0.685 0.683 0.677 0.66 0.638 0.653 0.669 0.659 0.647 ...
## $ generosidad : num 0.192 0.285 0.284 0.353 0.256 0.333 0.321 0.365 0.285 0.361 ...
## $ percepcioncorrupcion: num 0.393 0.41 0.408 0.138 0.357 0.295 0.291 0.389 0.383 0.302 ...
summary(indice)
## pais puntuacion pbi apoyosoc
## Length:156 Min. :2.905 Min. :0.0000 Min. :0.000
## Class :character 1st Qu.:4.454 1st Qu.:0.6162 1st Qu.:1.067
## Mode :character Median :5.378 Median :0.9495 Median :1.255
## Mean :5.376 Mean :0.8941 Mean :1.214
## 3rd Qu.:6.168 3rd Qu.:1.2025 3rd Qu.:1.466
## Max. :7.633 Max. :2.0960 Max. :1.644
## esperanza libertad generosidad percepcioncorrupcion
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.4223 1st Qu.:0.3560 1st Qu.:0.1108 1st Qu.:0.05175
## Median :0.6510 Median :0.4870 Median :0.1750 Median :0.08200
## Mean :0.5989 Mean :0.4555 Mean :0.1821 Mean :0.11306
## 3rd Qu.:0.7820 3rd Qu.:0.5800 3rd Qu.:0.2422 3rd Qu.:0.13650
## Max. :1.0300 Max. :0.7240 Max. :0.5980 Max. :0.45700
shapiro.test(indice$pbi)
##
## Shapiro-Wilk normality test
##
## data: indice$pbi
## W = 0.97748, p-value = 0.01176
es NO NORMAL
all=lm(pbi~puntuacion+esperanza+apoyosoc+libertad+generosidad+percepcioncorrupcion,data = indice)
summary(all)
##
## Call:
## lm(formula = pbi ~ puntuacion + esperanza + apoyosoc + libertad +
## generosidad + percepcioncorrupcion, data = indice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40453 -0.09228 -0.01632 0.08688 0.94715
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.31959 0.08590 -3.720 0.000281 ***
## puntuacion 0.13963 0.02724 5.126 9.04e-07 ***
## esperanza 0.82451 0.10094 8.168 1.23e-13 ***
## apoyosoc 0.08478 0.07815 1.085 0.279731
## libertad -0.22551 0.12017 -1.877 0.062526 .
## generosidad -0.23204 0.16935 -1.370 0.172693
## percepcioncorrupcion 0.09981 0.18913 0.528 0.598485
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1887 on 149 degrees of freedom
## Multiple R-squared: 0.7792, Adjusted R-squared: 0.7703
## F-statistic: 87.61 on 6 and 149 DF, p-value: < 2.2e-16
h1=formula(~ esperanza + percepcioncorrupcion)
cor.test(h1,data=indice,method = "pearson",exact = F)
##
## Pearson's product-moment correlation
##
## data: esperanza and percepcioncorrupcion
## t = 4.0953, df = 154, p-value = 6.791e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1643339 0.4484393
## sample estimates:
## cor
## 0.313382
La relacion es de 0.2 por el rho
en grafico:
library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
ggscatter(indice,x="esperanza",y="percepcioncorrupcion",cor.coef = TRUE,cor.method = "pearson",add = "reg.line",add.params = list(color="blue",fill="lightgray"),conf.int = TRUE)
shapiro.test(indice$pbi)
##
## Shapiro-Wilk normality test
##
## data: indice$pbi
## W = 0.97748, p-value = 0.01176
cor.test(indice$pbi,indice$generosidad,method = "pearson")
##
## Pearson's product-moment correlation
##
## data: indice$pbi and indice$generosidad
## t = -0.060844, df = 154, p-value = 0.9516
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1619189 0.1523552
## sample estimates:
## cor
## -0.004902909
library(ggpubr)
ggscatter(indice,x="pbi",y="generosidad",cor.coef = TRUE,cor.method = "pearson",add = "reg.line",add.params = list(color="blue",fill="lightgray"),conf.int = TRUE)
NO hay correlacion
shapiro.test(indice$esperanza)
##
## Shapiro-Wilk normality test
##
## data: indice$esperanza
## W = 0.95408, p-value = 5.151e-05
es NO NORMAL
espe=lm(esperanza~puntuacion+pbi+apoyosoc+libertad+generosidad+percepcioncorrupcion,data = indice)
summary(espe)
##
## Call:
## lm(formula = esperanza ~ puntuacion + pbi + apoyosoc + libertad +
## generosidad + percepcioncorrupcion, data = indice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45271 -0.05889 0.00928 0.07845 0.25994
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.09737 0.06004 -1.622 0.1070
## puntuacion 0.04902 0.01952 2.512 0.0131 *
## pbi 0.37513 0.04592 8.168 1.23e-13 ***
## apoyosoc 0.08534 0.05246 1.627 0.1059
## libertad -0.02142 0.08199 -0.261 0.7943
## generosidad -0.02816 0.11493 -0.245 0.8068
## percepcioncorrupcion 0.07642 0.12754 0.599 0.5500
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1273 on 149 degrees of freedom
## Multiple R-squared: 0.7479, Adjusted R-squared: 0.7378
## F-statistic: 73.68 on 6 and 149 DF, p-value: < 2.2e-16
Dos variables tiene efecto
shapiro.test(indice$percepcioncorrupcion)
##
## Shapiro-Wilk normality test
##
## data: indice$percepcioncorrupcion
## W = 0.81431, p-value = 8.486e-13
es NO NORMAL
percepcion=lm(percepcioncorrupcion~puntuacion+pbi+apoyosoc+libertad+generosidad+esperanza,data = indice)
summary(percepcion)
##
## Call:
## lm(formula = percepcioncorrupcion ~ puntuacion + pbi + apoyosoc +
## libertad + generosidad + esperanza, data = indice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17247 -0.05901 -0.01474 0.04397 0.33498
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.08207 0.03828 -2.144 0.03364 *
## puntuacion 0.02049 0.01267 1.617 0.10801
## pbi 0.01869 0.03542 0.528 0.59848
## apoyosoc -0.05800 0.03362 -1.725 0.08655 .
## libertad 0.16812 0.05078 3.311 0.00117 **
## generosidad 0.23753 0.07113 3.339 0.00106 **
## esperanza 0.03145 0.05250 0.599 0.54998
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08168 on 149 degrees of freedom
## Multiple R-squared: 0.3217, Adjusted R-squared: 0.2944
## F-statistic: 11.78 on 6 and 149 DF, p-value: 8.687e-11
,
link2="https://docs.google.com/spreadsheets/d/e/2PACX-1vTFLhCO2eqAth81eQsaT03RQx32n9GRxx6ixAR2trHCt4rWR6QIHk-Ig-b9VmXpBg/pub?output=csv"
ide=read.csv(link2,stringsAsFactors = F)
str(ide)
## 'data.frame': 195 obs. of 13 variables:
## $ regionUbigeo : int 10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
## $ provinciaUbigeo: int 10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
## $ PROVINCIA : chr "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
## $ IDE : num 0.774 0.662 0.632 0.46 0.605 ...
## $ identidad : num 98.6 94.6 97.5 86.2 96.2 ...
## $ salud : num 25.45 14.61 9.01 8.56 12.42 ...
## $ educacion : num 91.5 79.8 76.4 52.2 74.7 ...
## $ saneamiento : num 70.3 64.5 54.8 37.7 43.3 ...
## $ electrificacion: num 84 67.9 72.2 39.5 67.4 ...
## $ poblacion : int 54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
## $ costa : chr "NO" "NO" "NO" "NO" ...
## $ capital : chr "SI" "NO" "NO" "NO" ...
## $ tamano : chr "Pequena" "Pequena" "Muy pequena" "Pequena" ...
# columnas 2 y de la 4 a la 6:
ide[,c(11:13)]=lapply(ide[,c(11:13)],as.factor)
ide=ide[complete.cases(ide),]
summary(ide)
## regionUbigeo provinciaUbigeo PROVINCIA IDE
## Min. : 10000 Min. : 10100 Length:195 Min. :0.4245
## 1st Qu.: 50000 1st Qu.: 50750 Class :character 1st Qu.:0.5806
## Median :100000 Median :101100 Mode :character Median :0.6367
## Mean :113795 Mean :114358 Mean :0.6519
## 3rd Qu.:170000 3rd Qu.:170250 3rd Qu.:0.7164
## Max. :250000 Max. :250400 Max. :0.9104
## identidad salud educacion saneamiento
## Min. :81.97 Min. : 2.598 Min. :44.03 Min. : 0.2351
## 1st Qu.:96.54 1st Qu.: 6.485 1st Qu.:74.06 1st Qu.:40.9610
## Median :97.90 Median :10.380 Median :82.49 Median :54.2229
## Mean :97.30 Mean :11.919 Mean :80.42 Mean :56.2641
## 3rd Qu.:98.87 3rd Qu.:14.744 3rd Qu.:89.02 3rd Qu.:72.4629
## Max. :99.50 Max. :44.741 Max. :99.50 Max. :99.5000
## electrificacion poblacion costa capital tamano
## Min. :33.83 Min. : 4251 NO:163 NO:170 Grande :15
## 1st Qu.:61.31 1st Qu.: 32188 SI: 32 SI: 25 Mediana :46
## Median :74.29 Median : 63039 Muy grande : 1
## Mean :72.11 Mean : 154543 Muy pequena:69
## 3rd Qu.:85.02 3rd Qu.: 121804 Pequena :64
## Max. :99.50 Max. :8481415
levels(ide$costa)
## [1] "NO" "SI"
levels(ide$capital)
## [1] "NO" "SI"
levels(ide$tamano)
## [1] "Grande" "Mediana" "Muy grande" "Muy pequena" "Pequena"
shapiro.test(ide$IDE)
##
## Shapiro-Wilk normality test
##
## data: ide$IDE
## W = 0.98349, p-value = 0.02167
NO NORMAL
library(DescTools)
Skew(ide$IDE)
## [1] 0.3486982
Asimetria positiva
shapiro.test(ide$identidad)
##
## Shapiro-Wilk normality test
##
## data: ide$identidad
## W = 0.72664, p-value < 2.2e-16
no normal
names(ide)
## [1] "regionUbigeo" "provinciaUbigeo" "PROVINCIA"
## [4] "IDE" "identidad" "salud"
## [7] "educacion" "saneamiento" "electrificacion"
## [10] "poblacion" "costa" "capital"
## [13] "tamano"
ident=lm(identidad~salud+educacion+saneamiento+electrificacion+poblacion+costa+capital+tamano,data = ide)
summary(ident)
##
## Call:
## lm(formula = identidad ~ salud + educacion + saneamiento + electrificacion +
## poblacion + costa + capital + tamano, data = ide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1774 -0.8996 0.2286 1.1466 4.0155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.530e+01 1.642e+00 51.935 < 2e-16 ***
## salud -3.668e-03 2.897e-02 -0.127 0.89938
## educacion 1.190e-01 1.689e-02 7.045 3.64e-11 ***
## saneamiento 2.971e-02 1.067e-02 2.785 0.00592 **
## electrificacion -8.407e-03 1.470e-02 -0.572 0.56819
## poblacion 9.183e-07 1.937e-06 0.474 0.63607
## costaSI 1.979e-01 4.812e-01 0.411 0.68139
## capitalSI 2.562e-01 6.391e-01 0.401 0.68898
## tamanoMediana 1.178e+00 9.838e-01 1.198 0.23259
## tamanoMuy grande -6.758e+00 1.543e+01 -0.438 0.66192
## tamanoMuy pequena 1.767e+00 1.185e+00 1.491 0.13777
## tamanoPequena 1.030e+00 1.120e+00 0.920 0.35884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.905 on 183 degrees of freedom
## Multiple R-squared: 0.4343, Adjusted R-squared: 0.4003
## F-statistic: 12.77 on 11 and 183 DF, p-value: < 2.2e-16
Ninguna variable con efecto inverso tiene efecto significativo
shapiro.test(ide$saneamiento)
##
## Shapiro-Wilk normality test
##
## data: ide$saneamiento
## W = 0.98626, p-value = 0.05485
ES NORMAL es parametrico
h2=formula(saneamiento~costa)
aggregate(h2,data = ide,median)
## costa saneamiento
## 1 NO 51.3358
## 2 SI 84.3393
USAMOS PRUEBA T por ser PARAMETRICO
shapiro.test(ide$salud)
##
## Shapiro-Wilk normality test
##
## data: ide$salud
## W = 0.86893, p-value = 5.948e-12
no normal
sal=lm(salud~identidad+educacion+saneamiento+electrificacion+poblacion+costa+capital+tamano,data = ide)
summary(sal)
##
## Call:
## lm(formula = salud ~ identidad + educacion + saneamiento + electrificacion +
## poblacion + costa + capital + tamano, data = ide)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3402 -3.2121 -0.2517 2.6801 19.4999
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.176e+00 1.662e+01 0.251 0.8019
## identidad -2.388e-02 1.886e-01 -0.127 0.8994
## educacion 1.140e-02 4.858e-02 0.235 0.8147
## saneamiento 1.351e-01 2.594e-02 5.209 5.07e-07 ***
## electrificacion 2.556e-02 3.751e-02 0.681 0.4965
## poblacion 1.399e-06 4.946e-06 0.283 0.7776
## costaSI 2.171e+00 1.218e+00 1.783 0.0763 .
## capitalSI 7.036e+00 1.546e+00 4.551 9.73e-06 ***
## tamanoMediana -3.462e+00 2.507e+00 -1.381 0.1690
## tamanoMuy grande -1.258e+00 3.940e+01 -0.032 0.9746
## tamanoMuy pequena -3.255e-01 3.043e+00 -0.107 0.9149
## tamanoPequena -2.534e+00 2.859e+00 -0.887 0.3765
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.86 on 183 degrees of freedom
## Multiple R-squared: 0.5837, Adjusted R-squared: 0.5587
## F-statistic: 23.33 on 11 and 183 DF, p-value: < 2.2e-16
Las provincias que son capital de region tendrian mejor nivel de salud