TRABAJO FINAL DE ESTADISTICA

1. Unir archivos

1.1. Indice de libertades

Link2="https://en.wikipedia.org/wiki/List_of_freedom_indices"
Path2='//*[@id="mw-content-text"]/div[1]/table[2]'

Ifreedom=htmltab::htmltab(Link2,Path2)
names(Ifreedom)
## [1] "Country"                        "Freedom in the World 2020"     
## [3] "2020 Index of Economic Freedom" "2020 Press Freedom Index"      
## [5] "2019 Democracy Index"
names(Ifreedom)=c("country","Freedom","eco","press","demo")
str(Ifreedom)
## 'data.frame':    198 obs. of  5 variables:
##  $ country: chr  " Afghanistan" " Albania" " Algeria" " Andorra" ...
##  $ Freedom: chr  "not free" "partly free" "not free" "free" ...
##  $ eco    : chr  "mostly unfree" "moderately free" "repressed" "n/a" ...
##  $ press  : chr  "difficult situation" "noticeable problems" "difficult situation" "satisfactory situation" ...
##  $ demo   : chr  "authoritarian regime" "hybrid regime" "hybrid regime" "n/a" ...

1.2. Indice de felicidad

Link3="https://en.wikipedia.org/wiki/World_Happiness_Report"
Path3='//*[@id="mw-content-text"]/div[1]/table'

Ihapiness=htmltab::htmltab(Link3,Path3)
names(Ihapiness)
## [1] "Overall rank"                 "Country or region"           
## [3] "Score"                        "GDP per capita"              
## [5] "Social support"               "Healthy life expectancy"     
## [7] "Freedom to make life choices" "Generosity"                  
## [9] "Perceptions of corruption"

Eliminamos columnas

Ihapiness=Ihapiness[,-c(1,3)]
names(Ihapiness)=c("country","gdp","social","life","choices","generosity",
                     "perceptions")
str(Ihapiness)
## 'data.frame':    153 obs. of  7 variables:
##  $ country    : chr  " Finland" " Denmark" "  Switzerland" " Iceland" ...
##  $ gdp        : chr  "1.285" "1.327" "1.391" "1.327" ...
##  $ social     : chr  "1.500" "1.503" "1.472" "1.548" ...
##  $ life       : chr  "0.961" "0.979" "1.041" "1.001" ...
##  $ choices    : chr  "0.662" "0.665" "0.629" "0.662" ...
##  $ generosity : chr  "0.160" "0.243" "0.269" "0.362" ...
##  $ perceptions: chr  "0.478" "0.495" "0.408" "0.145" ...
Ihapiness[,2:7]=lapply(Ihapiness[,2:7],as.numeric)

1.3. Indice de democracia

Link1="https://en.wikipedia.org/wiki/Democracy_Index"
Path1='//*[@id="mw-content-text"]/div[1]/table[2]'

Idemocracia=htmltab::htmltab(Link1,Path1)
names(Idemocracia)
##  [1] "Rank >> Rank"                                                    
##  [2] "Country >> Country"                                              
##  [3] "Score >> Score"                                                  
##  [4] "Electoral processand pluralism >> Electoral processand pluralism"
##  [5] "Functio­ning ofgovern­ment >> Functio­ning ofgovern­ment"        
##  [6] "Politicalpartici­pation >> Politicalpartici­pation"              
##  [7] "Politicalculture >> Politicalculture"                            
##  [8] "Civilliberties >> Civilliberties"                                
##  [9] "Regimetype >> Regimetype"                                        
## [10] "Region >> Region"                                                
## [11] "Changes fromlast year >> Changes fromlast year"
Idemocracia=Idemocracia[,-c(1,3,11)]
names(Idemocracia)=c("country","elec","funct","partic","cult","civil",
                     "regime","region")
str(Idemocracia)
## 'data.frame':    167 obs. of  8 variables:
##  $ country: chr  " Norway" " Iceland" " Sweden" " New Zealand" ...
##  $ elec   : chr  "10.00" "10.00" "9.58" "10.00" ...
##  $ funct  : chr  "9.64" "9.29" "9.64" "9.29" ...
##  $ partic : chr  "10.00" "8.89" "8.33" "8.89" ...
##  $ cult   : chr  "10.00" "10.00" "10.00" "8.13" ...
##  $ civil  : chr  "9.71" "9.71" "9.41" "10.00" ...
##  $ regime : chr  "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
##  $ region : chr  "Western Europe" "Western Europe" "Western Europe" "Asia & Australasia" ...
Idemocracia[,2:6]=lapply(Idemocracia[,2:6],as.numeric)

1.4. Unimos las tres tablas

FreedomHapiness=merge(Ifreedom,Ihapiness)
InTotal=merge(FreedomHapiness,Idemocracia)

2. Exploracion de la variable

2.1. Exploraremos la variable Libertades civiles (civil)

2.1.1. Exploracion grafica:

Histograma

library(ggplot2)
base1=ggplot(InTotal,aes(x=civil))
histNum= base1 + geom_histogram(bins=7) 
histNum 

Distribucion asimétrica negativa

Boxplot

base2=ggplot(InTotal,aes(y=civil))
box=base2 + geom_boxplot() + coord_flip()

box 

Asimetría negativa

2.1.2. Exploracion con estadigrafos

summary(InTotal$civil)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.590   3.970   6.180   6.029   8.240  10.000
library(ggplot2)

estadigrafos=round(as.vector(summary(InTotal$civil)),2)

box + scale_y_continuous(breaks = estadigrafos) 

library(DescTools)
Skew(InTotal$civil,conf.level = 0.05)
##       skew     lwr.ci     upr.ci 
## -0.2781146 -0.2907496 -0.2783326

Distancia intercuartilica

IQR(InTotal$civil)
## [1] 4.27

3. Analisis bivariado

library(ggpubr)
p1=ggscatter(InTotal, 
          x = "cult", y = "civil",
          cor.coef = TRUE, 
          cor.method = "pearson") # spearman?

p1

s4=ggscatter(InTotal, 
          x = "cult", y = "civil",
          cor.coef = TRUE, 
          cor.method = "spearman")
s4

4. Regresión

4.1. Analisis de la dependiente

  1. Verificar normalidad:
shapiro.test(InTotal$civil)
## 
##  Shapiro-Wilk normality test
## 
## data:  InTotal$civil
## W = 0.95366, p-value = 0.0001005
  1. Verificar asimetría y si hay atípicos:
library(DescTools) 

Skew(InTotal$civil)
## [1] -0.2781146

Histograma

library(ggplot2)

base=ggplot(data=InTotal, aes(x=civil))
base+geom_histogram(bins=20)

Boxplot

base=ggplot(data=InTotal, aes(y=civil))
base+geom_boxplot() + coord_flip()

4.2. Analisis bivariado

Ha: Libertades civiles tiene relacion directa con participacion politica

H1=formula(civil ~ partic)
aggregate(H1, data=InTotal,median) 
##    partic civil
## 1    1.11 2.350
## 2    1.67 1.180
## 3    2.22 1.470
## 4    2.78 2.940
## 5    3.33 3.240
## 6    3.89 3.820
## 7    4.44 5.000
## 8    5.00 5.590
## 9    5.56 7.060
## 10   6.11 7.350
## 11   6.67 7.940
## 12   7.22 6.620
## 13   7.78 8.675
## 14   8.33 9.120
## 15   8.89 9.710
## 16  10.00 9.710
base=ggplot(data=InTotal, aes(x=partic,y=civil))
base+geom_boxplot() + coord_flip()
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

Asimetria negativa

Libertades civiles tiene relacion directa con cultura politica

H2=formula(civil ~ cult)
aggregate(H2, data=InTotal,median)
##     cult civil
## 1   1.88 2.350
## 2   2.50 3.970
## 3   3.13 4.410
## 4   3.75 4.410
## 5   4.38 4.855
## 6   5.00 3.820
## 7   5.63 5.590
## 8   6.25 6.470
## 9   6.88 8.530
## 10  7.50 9.120
## 11  8.13 8.970
## 12  8.75 9.710
## 13  9.38 9.120
## 14 10.00 9.710
base=ggplot(data=InTotal, aes(x=cult,y=civil))
base+geom_boxplot() + coord_flip()
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?

asimetria negativa

Por lo tanto

1.Cuando un pais tiene participacion politica las libertades civiles aumentan (relacion directa). Este efecto es significativo al 0.001

2.Cuando un pais tiene cultura poltica las libertades civiles aumentan (relacion directa). Este efecto es significativo al 0.001