FINAL.utf8.md

PRÁCTICA FINAL

Análisis indice global de felicidad

library(htmltab)

LINKWIKIP= "https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad"

TablaF= '//*[@id="mw-content-text"]/div/table'

Felicidad= htmltab(doc=LINKWIKIP,which =TablaF)

names(Felicidad)

## [1] "№"                                     
## [2] "País"                                  
## [3] "Puntuación"                            
## [4] "PIB per cápita"                        
## [5] "Apoyo social"                          
## [6] "Esperanza de años de vida saludable"   
## [7] "Libertad para tomar decisiones vitales"
## [8] "Generosidad"                           
## [9] "Percepción de la corrupción"

str(Felicidad)

## 'data.frame':    156 obs. of  9 variables:
##  $ №                                     : chr  "1" "2" "3" "4" ...
##  $ País                                  : chr  "Finlandia" "Colombia" "Noruega" "Dinamarca" ...
##  $ Puntuación                            : chr  "7.633" "7.594" "7.560" "7.555" ...
##  $ PIB per cápita                        : chr  "1.305" "1.456" "1.372" "1.351" ...
##  $ Apoyo social                          : chr  "1.592" "1.582" "1.595" "1.590" ...
##  $ Esperanza de años de vida saludable   : chr  "0.874" "0.873" "0.870" "0.868" ...
##  $ Libertad para tomar decisiones vitales: chr  "0.681" "0.686" "0.685" "0.683" ...
##  $ Generosidad                           : chr  "0.192" "0.286" "0.285" "0.284" ...
##  $ Percepción de la corrupción           : chr  "0.393" "0.130" "0.410" "0.408" ...

Felicidad[,c(3:9)]=lapply(Felicidad[,c(3:9)],as.numeric)

Felicidad$`№`=NULL

library(stringr)
names(Felicidad)=str_split(names(Felicidad)," ",simplify = T)[,1]

names(Felicidad)=str_replace_all(names(Felicidad), "[^[:ascii:]]", "")

str(Felicidad)

## 'data.frame':    156 obs. of  8 variables:
##  $ Pas        : chr  "Finlandia" "Colombia" "Noruega" "Dinamarca" ...
##  $ Puntuacin  : num  7.63 7.59 7.56 7.55 7.5 ...
##  $ PIB        : num  1.3 1.46 1.37 1.35 1.34 ...
##  $ Apoyo      : num  1.59 1.58 1.59 1.59 1.64 ...
##  $ Esperanza  : num  0.874 0.873 0.87 0.868 0.914 0.927 0.878 0.896 0.876 0.913 ...
##  $ Libertad   : num  0.681 0.686 0.685 0.683 0.677 0.66 0.638 0.653 0.669 0.659 ...
##  $ Generosidad: num  0.192 0.286 0.285 0.284 0.353 0.256 0.333 0.321 0.365 0.285 ...
##  $ Percepcin  : num  0.393 0.13 0.41 0.408 0.138 0.357 0.295 0.291 0.389 0.383 ...

summary(Felicidad)

##      Pas              Puntuacin          PIB             Apoyo      
##  Length:156         Min.   :2.905   Min.   :0.0000   Min.   :0.000  
##  Class :character   1st Qu.:4.454   1st Qu.:0.6162   1st Qu.:1.067  
##  Mode  :character   Median :5.378   Median :0.9495   Median :1.255  
##                     Mean   :5.384   Mean   :0.8941   Mean   :1.214  
##                     3rd Qu.:6.168   3rd Qu.:1.2025   3rd Qu.:1.466  
##                     Max.   :7.633   Max.   :2.0960   Max.   :1.644  
##    Esperanza         Libertad       Generosidad       Percepcin      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.4223   1st Qu.:0.3560   1st Qu.:0.1108   1st Qu.:0.05175  
##  Median :0.6510   Median :0.4870   Median :0.1750   Median :0.08200  
##  Mean   :0.5989   Mean   :0.4555   Mean   :0.1821   Mean   :0.11306  
##  3rd Qu.:0.7820   3rd Qu.:0.5800   3rd Qu.:0.2422   3rd Qu.:0.13650  
##  Max.   :1.0300   Max.   :0.7240   Max.   :0.5980   Max.   :0.45700

Dependiente= Esperanza

Regresion=lm(Esperanza ~ PIB + Apoyo + Libertad + Generosidad + Percepcin, data = Felicidad)

summary(Regresion)

## 
## Call:
## lm(formula = Esperanza ~ PIB + Apoyo + Libertad + Generosidad + 
##     Percepcin, data = Felicidad)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44391 -0.06484  0.01431  0.07939  0.24825 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.003605   0.047854  -0.075  0.94004    
## PIB          0.445871   0.036908  12.081  < 2e-16 ***
## Apoyo        0.139466   0.048666   2.866  0.00476 ** 
## Libertad     0.046434   0.078767   0.590  0.55641    
## Generosidad -0.002610   0.116482  -0.022  0.98216    
## Percepcin    0.122647   0.128416   0.955  0.34108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1295 on 150 degrees of freedom
## Multiple R-squared:  0.7373, Adjusted R-squared:  0.7285 
## F-statistic: 84.18 on 5 and 150 DF,  p-value: < 2.2e-16

Son significativas y directassolo PIB al 0.001 de la población con coeficiente de (0.44) y Apoyo al 0.01 con coeficiente de (0.13)

Dependiente= Percepción de la corrupción

Regresion2=lm(Percepcin ~ PIB + Apoyo + Libertad + Generosidad + Esperanza, data = Felicidad)

summary(Regresion2)

## 
## Call:
## lm(formula = Percepcin ~ PIB + Apoyo + Libertad + Generosidad + 
##     Esperanza, data = Felicidad)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.18413 -0.05413 -0.01276  0.04097  0.31134 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.04356    0.03013  -1.446 0.150266    
## PIB          0.04141    0.03269   1.267 0.207190    
## Apoyo       -0.03841    0.03153  -1.218 0.225052    
## Libertad     0.19913    0.04727   4.213 4.34e-05 ***
## Generosidad  0.25261    0.07090   3.563 0.000492 ***
## Esperanza    0.04928    0.05160   0.955 0.341075    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08212 on 150 degrees of freedom
## Multiple R-squared:  0.3098, Adjusted R-squared:  0.2868 
## F-statistic: 13.47 on 5 and 150 DF,  p-value: 7.529e-11

library(dlookr)

## Loading required package: mice

## Loading required package: lattice

## 
## Attaching package: 'mice'

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Registered S3 methods overwritten by 'car':
##   method                          from
##   influence.merMod                lme4
##   cooks.distance.influence.merMod lme4
##   dfbeta.influence.merMod         lme4
##   dfbetas.influence.merMod        lme4

## Warning in fun(libname, pkgname): couldn't connect to display ":0"

## 
## Attaching package: 'dlookr'

## The following object is masked from 'package:base':
## 
##     transform

normality(Felicidad[,c(2:8)])

## Warning: `cols` is now required.
## Please use `cols = c(statistic)`

## # A tibble: 7 x 4
##   vars        statistic  p_value sample
##   <chr>           <dbl>    <dbl>  <dbl>
## 1 Puntuacin       0.984 6.48e- 2    156
## 2 PIB             0.977 1.18e- 2    156
## 3 Apoyo           0.917 8.71e- 8    156
## 4 Esperanza       0.954 5.15e- 5    156
## 5 Libertad        0.946 1.13e- 5    156
## 6 Generosidad     0.960 1.79e- 4    156
## 7 Percepcin       0.814 8.49e-13    156

Generosidad y PIB

R=formula(~ Generosidad + PIB)

cor.test(R,data=Felicidad,method = "spearm",exact=F)

## 
##  Spearman's rank correlation rho
## 
## data:  Generosidad and PIB
## S = 628978, p-value = 0.9417
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##         rho 
## 0.005898693

library(ggpubr)

## Loading required package: ggplot2

## Loading required package: magrittr

ggscatter(Felicidad, 
          x = "Generosidad", y = "PIB",
          cor.coef = TRUE, 
          cor.method = "spearman",
           add = "reg.line",
          add.params = list(color = "blue", fill = "lightgray"),
          conf.int = TRUE)

IDE= 'https://docs.google.com/spreadsheets/d/e/2PACX-1vTChEH9LIjZeT4v3H-X_m2u66UqR0vG4AB0oWRYVzq1TpGSFrY7uCCZYwb7mCWhl79kfSYiKBQYp3-L/pub?gid=1780081736&single=true&output=csv'

Tabla=read.csv(IDE, stringsAsFactors = F)

names(Tabla)

##  [1] "regionUbigeo"    "provinciaUbigeo" "PROVINCIA"      
##  [4] "IDE"             "identidad"       "salud"          
##  [7] "educacion"       "saneamiento"     "electrificacion"
## [10] "poblacion"       "costa"           "capital"        
## [13] "tamano"

str(Tabla)

## 'data.frame':    195 obs. of  13 variables:
##  $ regionUbigeo   : int  10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
##  $ provinciaUbigeo: int  10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
##  $ PROVINCIA      : chr  "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
##  $ IDE            : num  0.774 0.662 0.632 0.46 0.605 ...
##  $ identidad      : num  98.6 94.6 97.5 86.2 96.2 ...
##  $ salud          : num  25.45 14.61 9.01 8.56 12.42 ...
##  $ educacion      : num  91.5 79.8 76.4 52.2 74.7 ...
##  $ saneamiento    : num  70.3 64.5 54.8 37.7 43.3 ...
##  $ electrificacion: num  84 67.9 72.2 39.5 67.4 ...
##  $ poblacion      : int  54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
##  $ costa          : chr  "NO" "NO" "NO" "NO" ...
##  $ capital        : chr  "SI" "NO" "NO" "NO" ...
##  $ tamano         : chr  "Pequena" "Pequena" "Muy pequena" "Pequena" ...

Tabla[,c(11:13)]=lapply(Tabla[,c(11:13)],as.factor)

str(Tabla)

## 'data.frame':    195 obs. of  13 variables:
##  $ regionUbigeo   : int  10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
##  $ provinciaUbigeo: int  10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
##  $ PROVINCIA      : chr  "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
##  $ IDE            : num  0.774 0.662 0.632 0.46 0.605 ...
##  $ identidad      : num  98.6 94.6 97.5 86.2 96.2 ...
##  $ salud          : num  25.45 14.61 9.01 8.56 12.42 ...
##  $ educacion      : num  91.5 79.8 76.4 52.2 74.7 ...
##  $ saneamiento    : num  70.3 64.5 54.8 37.7 43.3 ...
##  $ electrificacion: num  84 67.9 72.2 39.5 67.4 ...
##  $ poblacion      : int  54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
##  $ costa          : Factor w/ 2 levels "NO","SI": 1 1 1 1 1 1 1 1 1 1 ...
##  $ capital        : Factor w/ 2 levels "NO","SI": 2 1 1 1 1 1 1 2 1 1 ...
##  $ tamano         : Factor w/ 5 levels "Grande","Mediana",..: 5 5 4 5 5 4 2 2 4 4 ...

P=formula(saneamiento ~ costa)
aggregate(P, Tabla,mean)

##   costa saneamiento
## 1    NO    50.97556
## 2    SI    83.20243

normalidadTest=function(x) {y =shapiro.test(x); 
                            c(y$statistic, y$p.value)}

resultado= aggregate(P, Tabla,
                     FUN = normalidadTest)

library(knitr)

shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("SW_Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))

costa	SW_Statistic	Probabilidad
NO	0.9949525	0.8529651
SI	0.9580179	0.2422331

Regresion3=lm(identidad ~ salud + educacion + saneamiento + electrificacion + poblacion, data = Tabla)

summary(Regresion3)

## 
## Call:
## lm(formula = identidad ~ salud + educacion + saneamiento + electrificacion + 
##     poblacion, data = Tabla)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3984  -0.8723   0.1998   1.1178   3.7027 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      8.657e+01  1.002e+00  86.397  < 2e-16 ***
## salud           -1.341e-03  2.558e-02  -0.052   0.9582    
## educacion        1.251e-01  1.662e-02   7.524  2.1e-12 ***
## saneamiento      2.852e-02  9.816e-03   2.906   0.0041 ** 
## electrificacion -1.259e-02  1.437e-02  -0.876   0.3819    
## poblacion       -3.822e-08  2.386e-07  -0.160   0.8729    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.907 on 189 degrees of freedom
## Multiple R-squared:  0.4143, Adjusted R-squared:  0.3988 
## F-statistic: 26.74 on 5 and 189 DF,  p-value: < 2.2e-16

Rgresion4=lm(salud ~ educacion + saneamiento + electrificacion + identidad + 
    poblacion + capital + costa, data=Tabla)

summary(Rgresion4)

## 
## Call:
## lm(formula = salud ~ educacion + saneamiento + electrificacion + 
##     identidad + poblacion + capital + costa, data = Tabla)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3437  -3.1715  -0.5768   2.6857  20.8256 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -2.895e+00  1.675e+01  -0.173   0.8629    
## educacion        2.766e-02  4.964e-02   0.557   0.5780    
## saneamiento      1.286e-01  2.642e-02   4.869 2.38e-06 ***
## electrificacion  1.416e-02  3.838e-02   0.369   0.7126    
## identidad        2.877e-02  1.908e-01   0.151   0.8803    
## poblacion        1.469e-06  6.304e-07   2.329   0.0209 *  
## capitalSI        7.499e+00  1.268e+00   5.916 1.54e-08 ***
## costaSI          2.090e+00  1.222e+00   1.710   0.0889 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.996 on 187 degrees of freedom
## Multiple R-squared:  0.5504, Adjusted R-squared:  0.5336 
## F-statistic:  32.7 on 7 and 187 DF,  p-value: < 2.2e-16

TIENE RELACIÓN DIRECTA EL SANEAMIENTO Y SU COEFICIENTE ES SIGNIFICATIVO AL 0.001.