https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad

https://docs.google.com/spreadsheets/d/e/2PACX-1vTFLhCO2eqAth81eQsaT03RQx32n9GRxx6ixAR2trHCt4rWR6QIHk-Ig-b9VmXpBg/pub?output=csv

parte 1

link="https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad"
path='//*/div/table/tbody'
library(htmltab)
indice=htmltab(doc = link,which = path)

str(indice)

## 'data.frame':    156 obs. of  9 variables:
##  $ №                                     : chr  "1" "2" "3" "4" ...
##  $ País                                  : chr  "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
##  $ Puntuación                            : chr  "7.633" "7.560" "7.555" "7.495" ...
##  $ PIB per cápita                        : chr  "1.305" "1.372" "1.351" "1.343" ...
##  $ Apoyo social                          : chr  "1.592" "1.595" "1.590" "1.644" ...
##  $ Esperanza de años de vida saludable   : chr  "0.874" "0.870" "0.868" "0.914" ...
##  $ Libertad para tomar decisiones vitales: chr  "0.681" "0.685" "0.683" "0.677" ...
##  $ Generosidad                           : chr  "0.192" "0.285" "0.284" "0.353" ...
##  $ Percepción de la corrupción           : chr  "0.393" "0.410" "0.408" "0.138" ...

names(indice)

## [1] "№"                                     
## [2] "País"                                  
## [3] "Puntuación"                            
## [4] "PIB per cápita"                        
## [5] "Apoyo social"                          
## [6] "Esperanza de años de vida saludable"   
## [7] "Libertad para tomar decisiones vitales"
## [8] "Generosidad"                           
## [9] "Percepción de la corrupción"

newN=c("n","pais","puntuacion","pbi","apoyosoc","esperanza","libertad","generosidad","percepcioncorrupcion")

names(indice)=newN

indice$n =NULL

indice[,c(2:8)]=lapply(indice[,c(2:8)],as.numeric)

str(indice)

## 'data.frame':    156 obs. of  8 variables:
##  $ pais                : chr  "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
##  $ puntuacion          : num  7.63 7.56 7.55 7.5 7.49 ...
##  $ pbi                 : num  1.3 1.37 1.35 1.34 1.42 ...
##  $ apoyosoc            : num  1.59 1.59 1.59 1.64 1.55 ...
##  $ esperanza           : num  0.874 0.87 0.868 0.914 0.927 0.878 0.896 0.876 0.913 0.91 ...
##  $ libertad            : num  0.681 0.685 0.683 0.677 0.66 0.638 0.653 0.669 0.659 0.647 ...
##  $ generosidad         : num  0.192 0.285 0.284 0.353 0.256 0.333 0.321 0.365 0.285 0.361 ...
##  $ percepcioncorrupcion: num  0.393 0.41 0.408 0.138 0.357 0.295 0.291 0.389 0.383 0.302 ...

summary(indice)

##      pais             puntuacion         pbi            apoyosoc    
##  Length:156         Min.   :2.905   Min.   :0.0000   Min.   :0.000  
##  Class :character   1st Qu.:4.454   1st Qu.:0.6162   1st Qu.:1.067  
##  Mode  :character   Median :5.378   Median :0.9495   Median :1.255  
##                     Mean   :5.376   Mean   :0.8941   Mean   :1.214  
##                     3rd Qu.:6.168   3rd Qu.:1.2025   3rd Qu.:1.466  
##                     Max.   :7.633   Max.   :2.0960   Max.   :1.644  
##    esperanza         libertad       generosidad     percepcioncorrupcion
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000     
##  1st Qu.:0.4223   1st Qu.:0.3560   1st Qu.:0.1108   1st Qu.:0.05175     
##  Median :0.6510   Median :0.4870   Median :0.1750   Median :0.08200     
##  Mean   :0.5989   Mean   :0.4555   Mean   :0.1821   Mean   :0.11306     
##  3rd Qu.:0.7820   3rd Qu.:0.5800   3rd Qu.:0.2422   3rd Qu.:0.13650     
##  Max.   :1.0300   Max.   :0.7240   Max.   :0.5980   Max.   :0.45700

shapiro.test(indice$pbi)

## 
##  Shapiro-Wilk normality test
## 
## data:  indice$pbi
## W = 0.97748, p-value = 0.01176

es NO NORMAL

all=lm(pbi~puntuacion+esperanza+apoyosoc+libertad+generosidad+percepcioncorrupcion,data = indice)

summary(all)

## 
## Call:
## lm(formula = pbi ~ puntuacion + esperanza + apoyosoc + libertad + 
##     generosidad + percepcioncorrupcion, data = indice)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40453 -0.09228 -0.01632  0.08688  0.94715 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.31959    0.08590  -3.720 0.000281 ***
## puntuacion            0.13963    0.02724   5.126 9.04e-07 ***
## esperanza             0.82451    0.10094   8.168 1.23e-13 ***
## apoyosoc              0.08478    0.07815   1.085 0.279731    
## libertad             -0.22551    0.12017  -1.877 0.062526 .  
## generosidad          -0.23204    0.16935  -1.370 0.172693    
## percepcioncorrupcion  0.09981    0.18913   0.528 0.598485    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1887 on 149 degrees of freedom
## Multiple R-squared:  0.7792, Adjusted R-squared:  0.7703 
## F-statistic: 87.61 on 6 and 149 DF,  p-value: < 2.2e-16

relacion entre esperanza y percepcioncorrupcion

h1=formula(~ esperanza + percepcioncorrupcion)
cor.test(h1,data=indice,method = "pearson",exact = F)

## 
##  Pearson's product-moment correlation
## 
## data:  esperanza and percepcioncorrupcion
## t = 4.0953, df = 154, p-value = 6.791e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1643339 0.4484393
## sample estimates:
##      cor 
## 0.313382

La relacion es de 0.2 por el rho

en grafico:

library(ggpubr)

## Loading required package: ggplot2

## Loading required package: magrittr

ggscatter(indice,x="esperanza",y="percepcioncorrupcion",cor.coef = TRUE,cor.method = "pearson",add = "reg.line",add.params = list(color="blue",fill="lightgray"),conf.int = TRUE)

correlacion entre PBI y generosidad

shapiro.test(indice$pbi)

## 
##  Shapiro-Wilk normality test
## 
## data:  indice$pbi
## W = 0.97748, p-value = 0.01176

cor.test(indice$pbi,indice$generosidad,method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  indice$pbi and indice$generosidad
## t = -0.060844, df = 154, p-value = 0.9516
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1619189  0.1523552
## sample estimates:
##          cor 
## -0.004902909

library(ggpubr)
ggscatter(indice,x="pbi",y="generosidad",cor.coef = TRUE,cor.method = "pearson",add = "reg.line",add.params = list(color="blue",fill="lightgray"),conf.int = TRUE)

NO hay correlacion

tomando a esperanza como dep

shapiro.test(indice$esperanza)

## 
##  Shapiro-Wilk normality test
## 
## data:  indice$esperanza
## W = 0.95408, p-value = 5.151e-05

es NO NORMAL

espe=lm(esperanza~puntuacion+pbi+apoyosoc+libertad+generosidad+percepcioncorrupcion,data = indice)

summary(espe)

## 
## Call:
## lm(formula = esperanza ~ puntuacion + pbi + apoyosoc + libertad + 
##     generosidad + percepcioncorrupcion, data = indice)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45271 -0.05889  0.00928  0.07845  0.25994 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.09737    0.06004  -1.622   0.1070    
## puntuacion            0.04902    0.01952   2.512   0.0131 *  
## pbi                   0.37513    0.04592   8.168 1.23e-13 ***
## apoyosoc              0.08534    0.05246   1.627   0.1059    
## libertad             -0.02142    0.08199  -0.261   0.7943    
## generosidad          -0.02816    0.11493  -0.245   0.8068    
## percepcioncorrupcion  0.07642    0.12754   0.599   0.5500    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1273 on 149 degrees of freedom
## Multiple R-squared:  0.7479, Adjusted R-squared:  0.7378 
## F-statistic: 73.68 on 6 and 149 DF,  p-value: < 2.2e-16

Dos variables tiene efecto

toamndo percepcion corrupcion como DEP

shapiro.test(indice$percepcioncorrupcion)

## 
##  Shapiro-Wilk normality test
## 
## data:  indice$percepcioncorrupcion
## W = 0.81431, p-value = 8.486e-13

es NO NORMAL

percepcion=lm(percepcioncorrupcion~puntuacion+pbi+apoyosoc+libertad+generosidad+esperanza,data = indice)

summary(percepcion)

## 
## Call:
## lm(formula = percepcioncorrupcion ~ puntuacion + pbi + apoyosoc + 
##     libertad + generosidad + esperanza, data = indice)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.17247 -0.05901 -0.01474  0.04397  0.33498 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.08207    0.03828  -2.144  0.03364 * 
## puntuacion   0.02049    0.01267   1.617  0.10801   
## pbi          0.01869    0.03542   0.528  0.59848   
## apoyosoc    -0.05800    0.03362  -1.725  0.08655 . 
## libertad     0.16812    0.05078   3.311  0.00117 **
## generosidad  0.23753    0.07113   3.339  0.00106 **
## esperanza    0.03145    0.05250   0.599  0.54998   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08168 on 149 degrees of freedom
## Multiple R-squared:  0.3217, Adjusted R-squared:  0.2944 
## F-statistic: 11.78 on 6 and 149 DF,  p-value: 8.687e-11

PARTE 2

link2="https://docs.google.com/spreadsheets/d/e/2PACX-1vTFLhCO2eqAth81eQsaT03RQx32n9GRxx6ixAR2trHCt4rWR6QIHk-Ig-b9VmXpBg/pub?output=csv"

ide=read.csv(link2,stringsAsFactors = F)

str(ide)

## 'data.frame':    195 obs. of  13 variables:
##  $ regionUbigeo   : int  10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
##  $ provinciaUbigeo: int  10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
##  $ PROVINCIA      : chr  "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
##  $ IDE            : num  0.774 0.662 0.632 0.46 0.605 ...
##  $ identidad      : num  98.6 94.6 97.5 86.2 96.2 ...
##  $ salud          : num  25.45 14.61 9.01 8.56 12.42 ...
##  $ educacion      : num  91.5 79.8 76.4 52.2 74.7 ...
##  $ saneamiento    : num  70.3 64.5 54.8 37.7 43.3 ...
##  $ electrificacion: num  84 67.9 72.2 39.5 67.4 ...
##  $ poblacion      : int  54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
##  $ costa          : chr  "NO" "NO" "NO" "NO" ...
##  $ capital        : chr  "SI" "NO" "NO" "NO" ...
##  $ tamano         : chr  "Pequena" "Pequena" "Muy pequena" "Pequena" ...

# columnas 2 y de la 4 a la 6:
ide[,c(11:13)]=lapply(ide[,c(11:13)],as.factor)

ide=ide[complete.cases(ide),]

summary(ide)

##   regionUbigeo    provinciaUbigeo   PROVINCIA              IDE        
##  Min.   : 10000   Min.   : 10100   Length:195         Min.   :0.4245  
##  1st Qu.: 50000   1st Qu.: 50750   Class :character   1st Qu.:0.5806  
##  Median :100000   Median :101100   Mode  :character   Median :0.6367  
##  Mean   :113795   Mean   :114358                      Mean   :0.6519  
##  3rd Qu.:170000   3rd Qu.:170250                      3rd Qu.:0.7164  
##  Max.   :250000   Max.   :250400                      Max.   :0.9104  
##    identidad         salud          educacion      saneamiento     
##  Min.   :81.97   Min.   : 2.598   Min.   :44.03   Min.   : 0.2351  
##  1st Qu.:96.54   1st Qu.: 6.485   1st Qu.:74.06   1st Qu.:40.9610  
##  Median :97.90   Median :10.380   Median :82.49   Median :54.2229  
##  Mean   :97.30   Mean   :11.919   Mean   :80.42   Mean   :56.2641  
##  3rd Qu.:98.87   3rd Qu.:14.744   3rd Qu.:89.02   3rd Qu.:72.4629  
##  Max.   :99.50   Max.   :44.741   Max.   :99.50   Max.   :99.5000  
##  electrificacion   poblacion       costa    capital          tamano  
##  Min.   :33.83   Min.   :   4251   NO:163   NO:170   Grande     :15  
##  1st Qu.:61.31   1st Qu.:  32188   SI: 32   SI: 25   Mediana    :46  
##  Median :74.29   Median :  63039                     Muy grande : 1  
##  Mean   :72.11   Mean   : 154543                     Muy pequena:69  
##  3rd Qu.:85.02   3rd Qu.: 121804                     Pequena    :64  
##  Max.   :99.50   Max.   :8481415

levels(ide$costa)

## [1] "NO" "SI"

levels(ide$capital)

## [1] "NO" "SI"

levels(ide$tamano)

## [1] "Grande"      "Mediana"     "Muy grande"  "Muy pequena" "Pequena"

shapiro.test(ide$IDE)

## 
##  Shapiro-Wilk normality test
## 
## data:  ide$IDE
## W = 0.98349, p-value = 0.02167

NO NORMAL

library(DescTools)
Skew(ide$IDE)

## [1] 0.3486982

Asimetria positiva

tomando a identidad como DEP

shapiro.test(ide$identidad)

## 
##  Shapiro-Wilk normality test
## 
## data:  ide$identidad
## W = 0.72664, p-value < 2.2e-16

no normal

names(ide)

##  [1] "regionUbigeo"    "provinciaUbigeo" "PROVINCIA"      
##  [4] "IDE"             "identidad"       "salud"          
##  [7] "educacion"       "saneamiento"     "electrificacion"
## [10] "poblacion"       "costa"           "capital"        
## [13] "tamano"

ident=lm(identidad~salud+educacion+saneamiento+electrificacion+poblacion+costa+capital+tamano,data = ide)

summary(ident)

## 
## Call:
## lm(formula = identidad ~ salud + educacion + saneamiento + electrificacion + 
##     poblacion + costa + capital + tamano, data = ide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.1774  -0.8996   0.2286   1.1466   4.0155 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        8.530e+01  1.642e+00  51.935  < 2e-16 ***
## salud             -3.668e-03  2.897e-02  -0.127  0.89938    
## educacion          1.190e-01  1.689e-02   7.045 3.64e-11 ***
## saneamiento        2.971e-02  1.067e-02   2.785  0.00592 ** 
## electrificacion   -8.407e-03  1.470e-02  -0.572  0.56819    
## poblacion          9.183e-07  1.937e-06   0.474  0.63607    
## costaSI            1.979e-01  4.812e-01   0.411  0.68139    
## capitalSI          2.562e-01  6.391e-01   0.401  0.68898    
## tamanoMediana      1.178e+00  9.838e-01   1.198  0.23259    
## tamanoMuy grande  -6.758e+00  1.543e+01  -0.438  0.66192    
## tamanoMuy pequena  1.767e+00  1.185e+00   1.491  0.13777    
## tamanoPequena      1.030e+00  1.120e+00   0.920  0.35884    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.905 on 183 degrees of freedom
## Multiple R-squared:  0.4343, Adjusted R-squared:  0.4003 
## F-statistic: 12.77 on 11 and 183 DF,  p-value: < 2.2e-16

Ninguna variable con efecto inverso tiene efecto significativo

Relacion saneamiento y costa

shapiro.test(ide$saneamiento)

## 
##  Shapiro-Wilk normality test
## 
## data:  ide$saneamiento
## W = 0.98626, p-value = 0.05485

ES NORMAL es parametrico

h2=formula(saneamiento~costa)
aggregate(h2,data = ide,median)

##   costa saneamiento
## 1    NO     51.3358
## 2    SI     84.3393

USAMOS PRUEBA T por ser PARAMETRICO

tomsndo de salud como DEP

shapiro.test(ide$salud)

## 
##  Shapiro-Wilk normality test
## 
## data:  ide$salud
## W = 0.86893, p-value = 5.948e-12

no normal

sal=lm(salud~identidad+educacion+saneamiento+electrificacion+poblacion+costa+capital+tamano,data = ide)

summary(sal)

## 
## Call:
## lm(formula = salud ~ identidad + educacion + saneamiento + electrificacion + 
##     poblacion + costa + capital + tamano, data = ide)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.3402  -3.2121  -0.2517   2.6801  19.4999 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        4.176e+00  1.662e+01   0.251   0.8019    
## identidad         -2.388e-02  1.886e-01  -0.127   0.8994    
## educacion          1.140e-02  4.858e-02   0.235   0.8147    
## saneamiento        1.351e-01  2.594e-02   5.209 5.07e-07 ***
## electrificacion    2.556e-02  3.751e-02   0.681   0.4965    
## poblacion          1.399e-06  4.946e-06   0.283   0.7776    
## costaSI            2.171e+00  1.218e+00   1.783   0.0763 .  
## capitalSI          7.036e+00  1.546e+00   4.551 9.73e-06 ***
## tamanoMediana     -3.462e+00  2.507e+00  -1.381   0.1690    
## tamanoMuy grande  -1.258e+00  3.940e+01  -0.032   0.9746    
## tamanoMuy pequena -3.255e-01  3.043e+00  -0.107   0.9149    
## tamanoPequena     -2.534e+00  2.859e+00  -0.887   0.3765    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.86 on 183 degrees of freedom
## Multiple R-squared:  0.5837, Adjusted R-squared:  0.5587 
## F-statistic: 23.33 on 11 and 183 DF,  p-value: < 2.2e-16

Las provincias que son capital de region tendrian mejor nivel de salud