susti.utf8.md

library(htmltab)
link="https://es.wikipedia.org/wiki/%C3%8Dndice_global_de_felicidad"
path='//*/div/table'
susti=htmltab(doc = link,which = path)

str(susti)

## 'data.frame':    156 obs. of  9 variables:
##  $ №                                     : chr  "1" "2" "3" "4" ...
##  $ País                                  : chr  "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
##  $ Puntuación                            : chr  "7.633" "7.560" "7.555" "7.495" ...
##  $ PIB per cápita                        : chr  "1.305" "1.372" "1.351" "1.343" ...
##  $ Apoyo social                          : chr  "1.592" "1.595" "1.590" "1.644" ...
##  $ Esperanza de años de vida saludable   : chr  "0.874" "0.870" "0.868" "0.914" ...
##  $ Libertad para tomar decisiones vitales: chr  "0.681" "0.685" "0.683" "0.677" ...
##  $ Generosidad                           : chr  "0.192" "0.285" "0.284" "0.353" ...
##  $ Percepción de la corrupción           : chr  "0.393" "0.410" "0.408" "0.138" ...

susti[,c(3:9)]=lapply(susti[,c(3:9)], as.numeric)

veo

str(susti)

## 'data.frame':    156 obs. of  9 variables:
##  $ №                                     : chr  "1" "2" "3" "4" ...
##  $ País                                  : chr  "Finlandia" "Noruega" "Dinamarca" "Islandia" ...
##  $ Puntuación                            : num  7.63 7.56 7.55 7.5 7.49 ...
##  $ PIB per cápita                        : num  1.3 1.37 1.35 1.34 1.42 ...
##  $ Apoyo social                          : num  1.59 1.59 1.59 1.64 1.55 ...
##  $ Esperanza de años de vida saludable   : num  0.874 0.87 0.868 0.914 0.927 0.878 0.896 0.876 0.913 0.91 ...
##  $ Libertad para tomar decisiones vitales: num  0.681 0.685 0.683 0.677 0.66 0.638 0.653 0.669 0.659 0.647 ...
##  $ Generosidad                           : num  0.192 0.285 0.284 0.353 0.256 0.333 0.321 0.365 0.285 0.361 ...
##  $ Percepción de la corrupción           : num  0.393 0.41 0.408 0.138 0.357 0.295 0.291 0.389 0.383 0.302 ...

ESPACIOS ELIMINO

nuevonom=c("numero", "pais","puntuacion","pbiperca", "apoyo",
       "esperanzadevida","libertad","generosidad","percepcion")
       
names(susti)= nuevonom

names(susti)

## [1] "numero"          "pais"            "puntuacion"      "pbiperca"       
## [5] "apoyo"           "esperanzadevida" "libertad"        "generosidad"    
## [9] "percepcion"

PREGUNTA 1:PERCEPCION DE CORRUPCIÒN DEPENDIENTE

hip1=lm(percepcion~puntuacion+pbiperca+apoyo+esperanzadevida+libertad+generosidad, data=susti)
summary(hip1)

## 
## Call:
## lm(formula = percepcion ~ puntuacion + pbiperca + apoyo + esperanzadevida + 
##     libertad + generosidad, data = susti)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.17247 -0.05901 -0.01474  0.04397  0.33498 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     -0.08207    0.03828  -2.144  0.03364 * 
## puntuacion       0.02049    0.01267   1.617  0.10801   
## pbiperca         0.01869    0.03542   0.528  0.59848   
## apoyo           -0.05800    0.03362  -1.725  0.08655 . 
## esperanzadevida  0.03145    0.05250   0.599  0.54998   
## libertad         0.16812    0.05078   3.311  0.00117 **
## generosidad      0.23753    0.07113   3.339  0.00106 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08168 on 149 degrees of freedom
## Multiple R-squared:  0.3217, Adjusted R-squared:  0.2944 
## F-statistic: 11.78 on 6 and 149 DF,  p-value: 8.687e-11

PREGUNTA 2: Si tomamos de nuestras variable de interes a esperanza de años de vida saludable como la dependiente, y a las demas como las independientes, puede concluir que:

hip2=lm(esperanzadevida~puntuacion+pbiperca+apoyo+libertad+generosidad+percepcion, data=susti)
summary(hip2)

## 
## Call:
## lm(formula = esperanzadevida ~ puntuacion + pbiperca + apoyo + 
##     libertad + generosidad + percepcion, data = susti)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45271 -0.05889  0.00928  0.07845  0.25994 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.09737    0.06004  -1.622   0.1070    
## puntuacion   0.04902    0.01952   2.512   0.0131 *  
## pbiperca     0.37513    0.04592   8.168 1.23e-13 ***
## apoyo        0.08534    0.05246   1.627   0.1059    
## libertad    -0.02142    0.08199  -0.261   0.7943    
## generosidad -0.02816    0.11493  -0.245   0.8068    
## percepcion   0.07642    0.12754   0.599   0.5500    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1273 on 149 degrees of freedom
## Multiple R-squared:  0.7479, Adjusted R-squared:  0.7378 
## F-statistic: 73.68 on 6 and 149 DF,  p-value: < 2.2e-16

PREGUNTA 3 CORRELACION DE PIB Y GENEROSIDAD

NUMERICA- NUMERICA

library(ggpubr)

## Loading required package: ggplot2

## Loading required package: magrittr

pregun3=ggscatter(susti, 
          x = "pbiperca", y = "generosidad",
          cor.coef = TRUE, 
          cor.method = "spearman") 

pregun3

2da parte

link="https://docs.google.com/spreadsheets/d/e/2PACX-1vQ91JZkU1x4kSYl-PAGuZKQAcXApVDf4e8Dx10BwQhrfyter5PZ2RcNSpyyI-Ri4hLcq1kBR30DMfdk/pub?gid=305809466&single=true&output=csv"
susti2=read.csv(link, stringsAsFactors = F)

str(susti2)

## 'data.frame':    195 obs. of  13 variables:
##  $ regionUbigeo   : int  10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
##  $ provinciaUbigeo: int  10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
##  $ PROVINCIA      : chr  "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
##  $ IDE            : num  0.774 0.662 0.632 0.46 0.605 ...
##  $ identidad      : num  98.6 94.6 97.5 86.2 96.2 ...
##  $ salud          : num  25.45 14.61 9.01 8.56 12.42 ...
##  $ educacion      : num  91.5 79.8 76.4 52.2 74.7 ...
##  $ saneamiento    : num  70.3 64.5 54.8 37.7 43.3 ...
##  $ electrificacion: num  84 67.9 72.2 39.5 67.4 ...
##  $ poblacion      : int  54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
##  $ costa          : chr  "NO" "NO" "NO" "NO" ...
##  $ capital        : chr  "SI" "NO" "NO" "NO" ...
##  $ tamano         : chr  "Pequena" "Pequena" "Muy pequena" "Pequena" ...

PREGUNTA 4 Sin hacer regresion, que podriamos concluir de la relacion entre saneamiento y costa (ser provincia en la costa) ?

shapiro.test(susti2$saneamiento)

## 
##  Shapiro-Wilk normality test
## 
## data:  susti2$saneamiento
## W = 0.98626, p-value = 0.05485

p-value = 0.05485

f1=formula(saneamiento~costa)
aggregate(f1, susti2, mean)

##   costa saneamiento
## 1    NO    50.97556
## 2    SI    83.20243

library(ggpubr)
ggqqplot(data=susti2,x="saneamiento") + facet_grid(. ~ costa)

normalidadTest=function(x) {y =shapiro.test(x); 
                            c(y$statistic, y$p.value)}
resultado= aggregate(f1, susti2,
                     FUN = normalidadTest) 
library(knitr)

shapiroTest=as.data.frame(resultado[,2])
names(shapiroTest)=c("Statistic","Probabilidad")
kable(cbind(resultado[1],shapiroTest))

costa	Statistic	Probabilidad
NO	0.9949525	0.8529651
SI	0.9580179	0.2422331

t.test(f1,susti2)

## 
##  Welch Two Sample t-test
## 
## data:  saneamiento by costa
## t = -14.131, df = 73.273, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -36.77164 -27.68210
## sample estimates:
## mean in group NO mean in group SI 
##         50.97556         83.20243

Si tomamos de nuestras variable de interés a salud como la dependiente, y a las demás como las independientes, puede concluir que:

str(susti2)

## 'data.frame':    195 obs. of  13 variables:
##  $ regionUbigeo   : int  10000 10000 10000 10000 10000 10000 10000 20000 20000 20000 ...
##  $ provinciaUbigeo: int  10100 10200 10300 10400 10500 10600 10700 20100 20200 20300 ...
##  $ PROVINCIA      : chr  "CHACHAPOYAS" "BAGUA" "BONGARA" "CONDORCANQUI" ...
##  $ IDE            : num  0.774 0.662 0.632 0.46 0.605 ...
##  $ identidad      : num  98.6 94.6 97.5 86.2 96.2 ...
##  $ salud          : num  25.45 14.61 9.01 8.56 12.42 ...
##  $ educacion      : num  91.5 79.8 76.4 52.2 74.7 ...
##  $ saneamiento    : num  70.3 64.5 54.8 37.7 43.3 ...
##  $ electrificacion: num  84 67.9 72.2 39.5 67.4 ...
##  $ poblacion      : int  54783 77438 32317 51802 52185 30236 118747 161003 7974 16879 ...
##  $ costa          : chr  "NO" "NO" "NO" "NO" ...
##  $ capital        : chr  "SI" "NO" "NO" "NO" ...
##  $ tamano         : chr  "Pequena" "Pequena" "Muy pequena" "Pequena" ...

ja1=lm(salud~IDE+identidad+educacion+saneamiento+electrificacion+poblacion+costa+capital+tamano, data=susti2)
summary(ja1)

## 
## Call:
## lm(formula = salud ~ IDE + identidad + educacion + saneamiento + 
##     electrificacion + poblacion + costa + capital + tamano, data = susti2)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0188775 -0.0074398  0.0006328  0.0075417  0.0143921 
## 
## Coefficients:
##                     Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)        9.362e-04  3.046e-02     0.031    0.976    
## IDE                3.000e+02  4.062e-02  7384.174   <2e-16 ***
## identidad         -5.998e-01  3.542e-04 -1693.213   <2e-16 ***
## educacion         -5.999e-01  1.216e-04 -4935.618   <2e-16 ***
## saneamiento       -5.998e-01  1.103e-04 -5438.806   <2e-16 ***
## electrificacion   -5.999e-01  1.091e-04 -5500.310   <2e-16 ***
## poblacion         -1.264e-08  9.062e-09    -1.395    0.165    
## costaSI           -8.621e-04  2.250e-03    -0.383    0.702    
## capitalSI          1.296e-03  2.989e-03     0.434    0.665    
## tamanoMediana     -3.901e-03  4.617e-03    -0.845    0.399    
## tamanoMuy grande   9.632e-02  7.217e-02     1.335    0.184    
## tamanoMuy pequena -8.177e-03  5.574e-03    -1.467    0.144    
## tamanoPequena     -6.747e-03  5.248e-03    -1.285    0.200    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.008904 on 182 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.091e+07 on 12 and 182 DF,  p-value: < 2.2e-16

Si tomamos de nuestras variable de interés a identidad como la dependiente, y a las demás como las independientes, puede concluir que:

ja2=lm(identidad~IDE+salud+educacion+saneamiento+electrificacion+poblacion+costa+capital+tamano, data=susti2)
summary(ja2)

## 
## Call:
## lm(formula = identidad ~ IDE + salud + educacion + saneamiento + 
##     electrificacion + poblacion + costa + capital + tamano, data = susti2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.031454 -0.012457  0.001169  0.012581  0.023947 
## 
## Coefficients:
##                     Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)        6.690e-03  5.078e-02     0.132    0.895    
## IDE                5.000e+02  2.881e-01  1735.734   <2e-16 ***
## salud             -1.667e+00  9.846e-04 -1693.213   <2e-16 ***
## educacion         -1.000e+00  6.580e-04 -1519.858   <2e-16 ***
## saneamiento       -1.000e+00  5.990e-04 -1669.340   <2e-16 ***
## electrificacion   -1.000e+00  5.827e-04 -1716.268   <2e-16 ***
## poblacion         -2.102e-08  1.511e-08    -1.391    0.166    
## costaSI           -1.437e-03  3.752e-03    -0.383    0.702    
## capitalSI          2.138e-03  4.983e-03     0.429    0.668    
## tamanoMediana     -6.413e-03  7.698e-03    -0.833    0.406    
## tamanoMuy grande   1.602e-01  1.203e-01     1.331    0.185    
## tamanoMuy pequena -1.352e-02  9.294e-03    -1.455    0.147    
## tamanoPequena     -1.117e-02  8.750e-03    -1.277    0.203    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01484 on 182 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 4.438e+05 on 12 and 182 DF,  p-value: < 2.2e-16