library(htmltab)
IF = htmltab(doc = "https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad",
which = '//*[@id="mw-content-text"]/div/table' )
str(IF)
## 'data.frame': 156 obs. of 9 variables:
## $ № : chr "1" "2" "3" "4" ...
## $ País : chr "Finlandia" "Colombia" "Noruega" "Dinamarca" ...
## $ Puntuación : chr "7.633" "7.594" "7.560" "7.555" ...
## $ PIB per cápita : chr "1.305" "1.456" "1.372" "1.351" ...
## $ Apoyo social : chr "1.592" "1.582" "1.595" "1.590" ...
## $ Esperanza de años de vida saludable : chr "0.874" "0.873" "0.870" "0.868" ...
## $ Libertad para tomar decisiones vitales: chr "0.681" "0.686" "0.685" "0.683" ...
## $ Generosidad : chr "0.192" "0.286" "0.285" "0.284" ...
## $ Percepción de la corrupción : chr "0.393" "0.130" "0.410" "0.408" ...
IF= IF [,c (3:9)]
IF[,c(1:7)]=lapply(IF[,c(1:7)], as.numeric)
library(stringr)
names(IF)=str_split(names(IF)," ",simplify = T)[,1]
str(IF)
## 'data.frame': 156 obs. of 7 variables:
## $ Puntuación : num 7.63 7.59 7.56 7.55 7.5 ...
## $ PIB : num 1.3 1.46 1.37 1.35 1.34 ...
## $ Apoyo : num 1.59 1.58 1.59 1.59 1.64 ...
## $ Esperanza : num 0.874 0.873 0.87 0.868 0.914 0.927 0.878 0.896 0.876 0.913 ...
## $ Libertad : num 0.681 0.686 0.685 0.683 0.677 0.66 0.638 0.653 0.669 0.659 ...
## $ Generosidad: num 0.192 0.286 0.285 0.284 0.353 0.256 0.333 0.321 0.365 0.285 ...
## $ Percepción : num 0.393 0.13 0.41 0.408 0.138 0.357 0.295 0.291 0.389 0.383 ...
regresion1=lm(Percepción~.,data=IF)
summary(regresion1)
##
## Call:
## lm(formula = Percepción ~ ., data = IF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17316 -0.05857 -0.01548 0.04287 0.33224
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.07672 0.03802 -2.018 0.04538 *
## Puntuación 0.01812 0.01274 1.422 0.15718
## PIB 0.02067 0.03569 0.579 0.56330
## Apoyo -0.05587 0.03374 -1.656 0.09980 .
## Esperanza 0.03400 0.05254 0.647 0.51855
## Libertad 0.17098 0.05110 3.346 0.00104 **
## Generosidad 0.23796 0.07141 3.332 0.00109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08184 on 149 degrees of freedom
## Multiple R-squared: 0.3191, Adjusted R-squared: 0.2917
## F-statistic: 11.64 on 6 and 149 DF, p-value: 1.145e-10
library(dlookr)
## Loading required package: mice
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
##
## Attaching package: 'dlookr'
## The following object is masked from 'package:base':
##
## transform
normality(IF[,c(2,6)])
## Warning: `cols` is now required.
## Please use `cols = c(statistic)`
## # A tibble: 2 x 4
## vars statistic p_value sample
## <chr> <dbl> <dbl> <dbl>
## 1 PIB 0.977 0.0118 156
## 2 Generosidad 0.960 0.000179 156
Niguna es mayor a 0.05, por ende, se va por el camino no paramétrico (spearman).
frog = formula(~ PIB + Generosidad)
library (ggplot2)
library(magrittr)
library(ggpubr)
NOOOO=ggscatter(IF, x = "Generosidad", y = "PIB", cor.coef = TRUE, cor.method = "spearman",
add = "reg.line", add.params = list(color = "blue", fill = "lightgray"), conf.int = TRUE)
NOOOO
Conclusión: No existe correlación ni significatividad (R = 0.00059, está muy cerca a 0)
library(dlookr)
normality(IF[,c(4,7)])
## Warning: `cols` is now required.
## Please use `cols = c(statistic)`
## # A tibble: 2 x 4
## vars statistic p_value sample
## <chr> <dbl> <dbl> <dbl>
## 1 Esperanza 0.954 5.15e- 5 156
## 2 Percepción 0.814 8.49e-13 156
La variable percepción es no es normal
nahnah = formula(~ Esperanza + Percepción)
library (ggplot2)
library(magrittr)
library(ggpubr)
Grafica0=ggscatter(IF, x = "Percepción", y = "Esperanza", cor.coef = TRUE, cor.method = "spearman",
add = "reg.line", add.params = list(color = "blue", fill = "lightgray"), conf.int = TRUE)
Grafica0
Conclusión: Hay correlación, pero esta no es significativa (R = 0.21).
regresion2=lm(Percepción~.,data=IF)
summary(regresion2)
##
## Call:
## lm(formula = Percepción ~ ., data = IF)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17316 -0.05857 -0.01548 0.04287 0.33224
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.07672 0.03802 -2.018 0.04538 *
## Puntuación 0.01812 0.01274 1.422 0.15718
## PIB 0.02067 0.03569 0.579 0.56330
## Apoyo -0.05587 0.03374 -1.656 0.09980 .
## Esperanza 0.03400 0.05254 0.647 0.51855
## Libertad 0.17098 0.05110 3.346 0.00104 **
## Generosidad 0.23796 0.07141 3.332 0.00109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08184 on 149 degrees of freedom
## Multiple R-squared: 0.3191, Adjusted R-squared: 0.2917
## F-statistic: 11.64 on 6 and 149 DF, p-value: 1.145e-10
Una variable tiene efecto inverso
linkA="https://docs.google.com/spreadsheets/d/e/2PACX-1vQAgq73wRBfVslN7zarqk50rY3a3WjfVKCoNFhF7rZdvpktMIDNHAnAI8YXL1ZMnkJFLiYnnYYOfeDk/pub?output=csv"
IDE=read.csv(linkA, stringsAsFactors = F,na.strings = '')
str(IDE)
## 'data.frame': 195 obs. of 13 variables:
## $ regionUbigeo : int 10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
## $ provinciaUbigeo: int 10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
## $ PROVINCIA : chr "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
## $ IDE : num 0.774 0.662 0.632 0.46 0.605 ...
## $ identidad : num 98.6 94.6 97.5 86.2 96.2 ...
## $ salud : num 25.45 14.61 9.01 8.56 12.42 ...
## $ educacion : num 91.5 79.8 76.4 52.2 74.7 ...
## $ saneamiento : num 70.3 64.5 54.8 37.7 43.3 ...
## $ electrificacion: num 84 67.9 72.2 39.5 67.4 ...
## $ poblacion : int 54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
## $ costa : chr "NO" "NO" "NO" "NO" ...
## $ capital : chr "SI" "NO" "NO" "NO" ...
## $ tamano : chr "Pequena" "Pequena" "Muy pequena" "Pequena" ...
IDE= IDE [,c (5:13)]
str(IDE)
## 'data.frame': 195 obs. of 9 variables:
## $ identidad : num 98.6 94.6 97.5 86.2 96.2 ...
## $ salud : num 25.45 14.61 9.01 8.56 12.42 ...
## $ educacion : num 91.5 79.8 76.4 52.2 74.7 ...
## $ saneamiento : num 70.3 64.5 54.8 37.7 43.3 ...
## $ electrificacion: num 84 67.9 72.2 39.5 67.4 ...
## $ poblacion : int 54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
## $ costa : chr "NO" "NO" "NO" "NO" ...
## $ capital : chr "SI" "NO" "NO" "NO" ...
## $ tamano : chr "Pequena" "Pequena" "Muy pequena" "Pequena" ...
Modificar variable tamaño
str (IDE$tamano)
## chr [1:195] "Pequena" "Pequena" "Muy pequena" "Pequena" "Pequena" ...
W=c("Muy pequena","Pequena", "Mediana","Grande")
IDE$tamano = factor(IDE$tamano, levels=W,ordered = TRUE)
str (IDE)
## 'data.frame': 195 obs. of 9 variables:
## $ identidad : num 98.6 94.6 97.5 86.2 96.2 ...
## $ salud : num 25.45 14.61 9.01 8.56 12.42 ...
## $ educacion : num 91.5 79.8 76.4 52.2 74.7 ...
## $ saneamiento : num 70.3 64.5 54.8 37.7 43.3 ...
## $ electrificacion: num 84 67.9 72.2 39.5 67.4 ...
## $ poblacion : int 54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
## $ costa : chr "NO" "NO" "NO" "NO" ...
## $ capital : chr "SI" "NO" "NO" "NO" ...
## $ tamano : Ord.factor w/ 4 levels "Muy pequena"<..: 2 2 1 2 2 1 3 3 1 1 ...
regresion3=lm(identidad~.,data=IDE)
summary(regresion3)
##
## Call:
## lm(formula = identidad ~ ., data = IDE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.1774 -0.9021 0.2405 1.1481 4.0155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.629e+01 1.165e+00 74.085 < 2e-16 ***
## salud -3.668e-03 2.897e-02 -0.127 0.89938
## educacion 1.190e-01 1.689e-02 7.045 3.64e-11 ***
## saneamiento 2.971e-02 1.067e-02 2.785 0.00592 **
## electrificacion -8.407e-03 1.470e-02 -0.572 0.56819
## poblacion 9.183e-07 1.937e-06 0.474 0.63607
## costaSI 1.979e-01 4.812e-01 0.411 0.68139
## capitalSI 2.562e-01 6.391e-01 0.401 0.68898
## tamano.L -1.152e+00 8.347e-01 -1.380 0.16918
## tamano.Q -2.210e-01 4.948e-01 -0.447 0.65567
## tamano.C -4.943e-01 3.030e-01 -1.632 0.10450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.905 on 183 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4323, Adjusted R-squared: 0.4012
## F-statistic: 13.93 on 10 and 183 DF, p-value: < 2.2e-16
Educación posee más efecto (0) que saenamiento (0.001)
regresion4=lm(salud~.,data=IDE)
summary(regresion4)
##
## Call:
## lm(formula = salud ~ ., data = IDE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.3402 -3.2217 -0.2625 2.6839 19.4999
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.596e+00 1.654e+01 0.157 0.8755
## identidad -2.388e-02 1.886e-01 -0.127 0.8994
## educacion 1.140e-02 4.858e-02 0.235 0.8147
## saneamiento 1.351e-01 2.594e-02 5.209 5.07e-07 ***
## electrificacion 2.556e-02 3.751e-02 0.681 0.4965
## poblacion 1.399e-06 4.946e-06 0.283 0.7776
## costaSI 2.171e+00 1.218e+00 1.783 0.0763 .
## capitalSI 7.036e+00 1.546e+00 4.551 9.73e-06 ***
## tamano.L 1.093e-02 2.141e+00 0.005 0.9959
## tamano.Q 2.836e+00 1.246e+00 2.276 0.0240 *
## tamano.C 6.952e-01 7.770e-01 0.895 0.3721
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.86 on 183 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5535, Adjusted R-squared: 0.5291
## F-statistic: 22.68 on 10 and 183 DF, p-value: < 2.2e-16
Saneamiento y Capital tienen un nivel de efecto a 0.
joooo= formula (saneamiento ~ costa)
aggregate (joooo, IDE, mean)
## costa saneamiento
## 1 NO 50.97556
## 2 SI 83.20243
dadadad=function(x) {y =shapiro.test(x); c(y$statistic,y$p.value)}
resultado= aggregate(joooo, IDE, FUN = dadadad)
library(knitr)
shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))
| costa | SW_Statistic | Probabilidad |
|---|---|---|
| NO | 0.9949525 | 0.8529651 |
| SI | 0.9580179 | 0.2422331 |
Para calcular necesitamos usar la prueba T al salir mayor a 0.05