library(htmltab)
FELIZ = htmltab(doc = "https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad",
which = '//*[@id="mw-content-text"]/div/table' )
str(FELIZ)
## 'data.frame': 156 obs. of 9 variables:
## $ № : chr "1" "2" "3" "4" ...
## $ País : chr "Finlandia" "Colombia" "Noruega" "Dinamarca" ...
## $ Puntuación : chr "7.633" "7.594" "7.560" "7.555" ...
## $ PIB per cápita : chr "1.305" "1.456" "1.372" "1.351" ...
## $ Apoyo social : chr "1.592" "1.582" "1.595" "1.590" ...
## $ Esperanza de años de vida saludable : chr "0.874" "0.873" "0.870" "0.868" ...
## $ Libertad para tomar decisiones vitales: chr "0.681" "0.686" "0.685" "0.683" ...
## $ Generosidad : chr "0.192" "0.286" "0.285" "0.284" ...
## $ Percepción de la corrupción : chr "0.393" "0.130" "0.410" "0.408" ...
FELIZ= FELIZ [,c (3:9)]
#numéricas
FELIZ[,c(1:7)]=lapply(FELIZ[,c(1:7)], as.numeric)
#elimina espacios
library(stringr)
names(FELIZ)=str_split(names(FELIZ)," ",simplify = T)[,1]
STR:
str(FELIZ)
## 'data.frame': 156 obs. of 7 variables:
## $ Puntuación : num 7.63 7.59 7.56 7.55 7.5 ...
## $ PIB : num 1.3 1.46 1.37 1.35 1.34 ...
## $ Apoyo : num 1.59 1.58 1.59 1.59 1.64 ...
## $ Esperanza : num 0.874 0.873 0.87 0.868 0.914 0.927 0.878 0.896 0.876 0.913 ...
## $ Libertad : num 0.681 0.686 0.685 0.683 0.677 0.66 0.638 0.653 0.669 0.659 ...
## $ Generosidad: num 0.192 0.286 0.285 0.284 0.353 0.256 0.333 0.321 0.365 0.285 ...
## $ Percepción : num 0.393 0.13 0.41 0.408 0.138 0.357 0.295 0.291 0.389 0.383 ...
Pregunta 1
regresion=lm(Percepción~.,data=FELIZ)
summary(regresion)
##
## Call:
## lm(formula = Percepción ~ ., data = FELIZ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17316 -0.05857 -0.01548 0.04287 0.33224
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.07672 0.03802 -2.018 0.04538 *
## Puntuación 0.01812 0.01274 1.422 0.15718
## PIB 0.02067 0.03569 0.579 0.56330
## Apoyo -0.05587 0.03374 -1.656 0.09980 .
## Esperanza 0.03400 0.05254 0.647 0.51855
## Libertad 0.17098 0.05110 3.346 0.00104 **
## Generosidad 0.23796 0.07141 3.332 0.00109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08184 on 149 degrees of freedom
## Multiple R-squared: 0.3191, Adjusted R-squared: 0.2917
## F-statistic: 11.64 on 6 and 149 DF, p-value: 1.145e-10
Pregunta 2
Análisis bivariado Normalidad
library(dlookr)
## Loading required package: mice
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
##
## Attaching package: 'dlookr'
## The following object is masked from 'package:base':
##
## transform
Niguna es mayor a 0.05, por ende, se va por el camino no paramétrico (spearman).
Corroborar hipótesis: H0 = No hay correlación
frog = formula(~ PIB + Generosidad)
library (ggplot2)
library(magrittr)
library(ggpubr)
GAAAA=ggscatter(FELIZ, x = "Generosidad", y = "PIB", cor.coef = TRUE, cor.method = "spearman",
add = "reg.line", add.params = list(color = "blue", fill = "lightgray"), conf.int = TRUE)
GAAAA
Conclusión: No HAY correlación ni significatividad (R = 0.00059, está muy cerca a 0)
Pregunta 3 normalidad
library(dlookr)
normality(FELIZ[,c(4,7)])
## Warning: `cols` is now required.
## Please use `cols = c(statistic)`
## # A tibble: 2 x 4
## vars statistic p_value sample
## <chr> <dbl> <dbl> <dbl>
## 1 Esperanza 0.954 5.15e- 5 156
## 2 Percepción 0.814 8.49e-13 156
La variable percepción es no normal
Corroborar hipótesis: H0 = No hay correlación
soda= formula(~ Esperanza + Percepción)
library (ggplot2)
library(magrittr)
library(ggpubr)
Grafica0=ggscatter(FELIZ, x = "Percepción", y = "Esperanza", cor.coef = TRUE, cor.method = "spearman",
add = "reg.line", add.params = list(color = "blue", fill = "lightgray"), conf.int = TRUE)
Grafica0
Conclusión: Hay correlación, pero no es significativa (R = 0.21).
Pregunta 4
regresion2=lm(Percepción~.,data=FELIZ)
summary(regresion2)
##
## Call:
## lm(formula = Percepción ~ ., data = FELIZ)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17316 -0.05857 -0.01548 0.04287 0.33224
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.07672 0.03802 -2.018 0.04538 *
## Puntuación 0.01812 0.01274 1.422 0.15718
## PIB 0.02067 0.03569 0.579 0.56330
## Apoyo -0.05587 0.03374 -1.656 0.09980 .
## Esperanza 0.03400 0.05254 0.647 0.51855
## Libertad 0.17098 0.05110 3.346 0.00104 **
## Generosidad 0.23796 0.07141 3.332 0.00109 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08184 on 149 degrees of freedom
## Multiple R-squared: 0.3191, Adjusted R-squared: 0.2917
## F-statistic: 11.64 on 6 and 149 DF, p-value: 1.145e-10
Segun la regresion, hay una variable tiene efecto inverso
linkA="https://docs.google.com/spreadsheets/d/e/2PACX-1vRcJpnJqH9VzTXl4NMv0zX45yRkXeMNST3fkSfGFCpUTh0S-dSzRtUj7CJqAzqMUE5r6tKQRZzdKq9V/pub?gid=1802780199&single=true&output=csv"
EST=read.csv(linkA, stringsAsFactors = F,na.strings = '')
str(EST)
## 'data.frame': 3414 obs. of 6 variables:
## $ FECHA : chr "01/01/2019 0:01" "01/01/2019 0:07" "01/01/2019 0:17" "01/01/2019 0:25" ...
## $ DÍA : int 3 3 3 3 3 3 3 3 3 3 ...
## $ MES : int 1 1 1 1 1 1 1 1 1 1 ...
## $ MODALIDAD: chr NA "Patrullaje Disuasivo" NA NA ...
## $ MEDIO : chr "TELÉFONO" "TELÉFONO" "TELÉFONO" "TELÉFONO" ...
## $ DIRECCIÓN: chr "AV. DOS DE MAYO N° 0864, SAN ISIDRO" "CA. GARCIA, GODOFREDO N° 0490, SAN ISIDRO" "CA. LOS ROBLES N° 210234, SAN ISIDRO" "CA. BURGOS N° 0179, SAN ISIDRO" ...