Vaya a este link: https://www.cia.gov/library/publications/resources/the-world-factbook/docs/rankorderguide.html
De ahi seleccione ver los datos sobre numero de usuarios de telefono celular por país.
library(htmltab) #instalemos primero
url3 = "https://www.cia.gov/library/publications/resources/the-world-factbook/fields/197rank.html"
celuX = htmltab(doc = url3,
which ='//*[@id="rankOrder"]',
encoding = "UTF-8")
## No encoding supplied: defaulting to UTF-8.
celu=celuX
write.csv(celuX,"celuX.csv",row.names = F)
celu$Rank=NULL
str(celu)
## 'data.frame': 216 obs. of 3 variables:
## $ Country : chr "China" "India" "Indonesia" "United States" ...
## $ Telephones - mobile cellular: chr "1,474,097,000" "1,168,902,277" "458,923,202" "395,881,000" ...
## $ Date of Information : chr "2017 est." "2017 est." "2017 est." "2017 est." ...
names(celu)= gsub(" |-","",names(celu))
library(stringr)
celu$DateofInformation=as.numeric(str_extract(celu$DateofInformation, "\\d+"))
celu=celu[!is.na(celu$DateofInformation) & celu$DateofInformation==2017,]
celu$Telephonesmobilecellular = gsub("\\,", "", celu$Telephonesmobilecellular)
celu$Telephonesmobilecellular = as.numeric(celu$Telephonesmobilecellular)
summary(celu$Telephonesmobilecellular) #verificamos
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.311e+04 2.443e+06 8.730e+06 4.481e+07 2.864e+07 1.474e+09
Descargue de wikipedia la data de: https://en.wikipedia.org/wiki/World_Happiness_Report. Esa wikipage tiene una tabla donde está el “score” de happiness, así como otros indicadores (esos no son sus componentes).
link1="https://en.wikipedia.org/wiki/World_Happiness_Report"
feliX<- htmltab(doc =link1,
which ='//*[@id="mw-content-text"]/div/table',encoding = "UTF-8")
feli=feliX
write.csv(feliX,"feliX.csv",row.names = F)
names(feli)=gsub(" |\\/","_", names(feli))
feli$Overall_Rank=NULL
str(feli)
## 'data.frame': 156 obs. of 8 variables:
## $ Country_Region : chr " Finland" " Norway" " Denmark" " Iceland" ...
## $ Score : chr "7.632" "7.594" "7.555" "7.495" ...
## $ GDP_per_capita : chr "1.305" "1.456" "1.351" "1.343" ...
## $ Social_support : chr "1.592" "1.582" "1.590" "1.644" ...
## $ Healthy_life_expectancy : chr "0.874" "0.861" "0.868" "0.914" ...
## $ Freedom_to_make_life_choices: chr "0.681" "0.686" "0.683" "0.677" ...
## $ Generosity : chr "0.192" "0.286" "0.284" "0.353" ...
## $ Perceptions_of_corruption : chr "0.393" "0.340" "0.408" "0.138" ...
names(feli)[2]="happy"
#feli=feli[complete.cases(feli),]
feli[,c(2:8)]=lapply(feli[,c(2:8)],as.numeric)
## Warning in lapply(feli[, c(2:8)], as.numeric): NAs introduced by coercion
summary(feli[,c(2:8)])
## happy GDP_per_capita Social_support Healthy_life_expectancy
## Min. :2.905 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:4.454 1st Qu.:0.6162 1st Qu.:1.067 1st Qu.:0.4223
## Median :5.378 Median :0.9495 Median :1.255 Median :0.6440
## Mean :5.376 Mean :0.8914 Mean :1.213 Mean :0.5973
## 3rd Qu.:6.168 3rd Qu.:1.1978 3rd Qu.:1.463 3rd Qu.:0.7772
## Max. :7.632 Max. :2.0960 Max. :1.644 Max. :1.0300
##
## Freedom_to_make_life_choices Generosity Perceptions_of_corruption
## Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.3560 1st Qu.:0.1095 1st Qu.:0.051
## Median :0.4870 Median :0.1740 Median :0.082
## Mean :0.4545 Mean :0.1809 Mean :0.112
## 3rd Qu.:0.5785 3rd Qu.:0.2390 3rd Qu.:0.137
## Max. :0.7240 Max. :0.5980 Max. :0.457
## NA's :1
Descargue de wikipedia la data de: https://en.wikipedia.org/wiki/Democracy_Index. Esa wikipage tiene una tabla donde está el “score” de democracia, así como otros indicadores (esos son las dimensiones que se utilizan para calcular el score).
link2 = "https://en.wikipedia.org/wiki/Democracy_Index"
demoX<- htmltab(doc =link2,
which ='//*[@id="mw-content-text"]/div/table[2]/tbody',encoding = "UTF-8")
demo=demoX
write.csv(demoX,"demoX.csv",row.names = F)
library(tidyr)
oldNames=as.data.frame(names(demo))
names(demo)=as.vector(unlist(separate(oldNames,'names(demo)','newNames',sep=" ")))
## Warning: Expected 1 pieces. Additional pieces discarded in 9 rows [1, 2, 3,
## 4, 5, 6, 7, 8, 9].
demo$Rank=NULL
str(demo)
## 'data.frame': 167 obs. of 8 variables:
## $ Country : chr " Norway" " Iceland" " Sweden" " New Zealand" ...
## $ Score : chr "9.87" "9.58" "9.39" "9.26" ...
## $ Electoral : chr "10.00" "10.00" "9.58" "10.00" ...
## $ Functioning : chr "9.64" "9.29" "9.64" "9.29" ...
## $ Politicalparticipation: chr "10.00" "8.89" "8.33" "8.89" ...
## $ Politicalculture : chr "10.00" "10.00" "10.00" "8.13" ...
## $ Civilliberties : chr "9.71" "9.71" "9.41" "10.00" ...
## $ Regimetype : chr "Full democracy" "Full democracy" "Full democracy" "Full democracy" ...
# lo ejecuto:
demo[,c(2:7)]=lapply(demo[,c(2:7)],as.numeric)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
demo$Regimetype= recode(demo$Regimetype,
'Full democracy'='4FullDemocracy',
'Flawed democracy'='3FlawedDemocracy',
'Hybrid regime'='2Hybrid regime',
'Authoritarian'='1Authoritarian')
# poner numero delante, ayuda a crear una ordinal
demo$Regimetype=as.ordered(demo$Regimetype)
round(cor(demo[,c(2:7)]),2)
## Score Electoral Functioning
## Score 1.00 0.93 0.92
## Electoral 0.93 1.00 0.80
## Functioning 0.92 0.80 1.00
## Politicalparticipation 0.85 0.75 0.72
## Politicalculture 0.73 0.52 0.68
## Civilliberties 0.96 0.91 0.86
## Politicalparticipation Politicalculture
## Score 0.85 0.73
## Electoral 0.75 0.52
## Functioning 0.72 0.68
## Politicalparticipation 1.00 0.57
## Politicalculture 0.57 1.00
## Civilliberties 0.78 0.64
## Civilliberties
## Score 0.96
## Electoral 0.91
## Functioning 0.86
## Politicalparticipation 0.78
## Politicalculture 0.64
## Civilliberties 1.00
Con las data guardada de los ejercicios anteriores, añada a los datos de la data sobre felicidad la data de democracia. Hágalo de tal manera que pueda detectar qué paises no tienen coincidencia de nombres. Luego de responder la pregunta que proponemos, quédese con las filas sin valores perdidos.
celu$Country=trimws(celu$Country,whitespace = "[\\h\\v]")
demo$Country=trimws(demo$Country,whitespace = "[\\h\\v]")
feli$Country_Region=trimws(feli$Country_Region,whitespace = "[\\h\\v]")
felidemo=merge(feli,demo,by.x = 'Country_Region',by.y = 'Country',all.x = T, all.y = T)
head(felidemo[!complete.cases(felidemo),])
## Country_Region happy GDP_per_capita Social_support
## 14 Belize 5.956 0.807 1.101
## 27 Cape Verde NA NA NA
## 33 Comoros NA NA NA
## 34 Congo (Brazzaville) 4.559 0.682 0.811
## 35 Congo (Kinshasa) 4.245 0.069 1.136
## 38 Cuba NA NA NA
## Healthy_life_expectancy Freedom_to_make_life_choices Generosity
## 14 0.474 0.593 0.183
## 27 NA NA NA
## 33 NA NA NA
## 34 0.343 0.514 0.091
## 35 0.204 0.312 0.197
## 38 NA NA NA
## Perceptions_of_corruption Score Electoral Functioning
## 14 0.089 NA NA NA
## 27 NA 7.88 9.17 7.86
## 33 NA 3.71 4.33 2.21
## 34 0.077 NA NA NA
## 35 0.052 NA NA NA
## 38 NA 3.00 1.08 3.57
## Politicalparticipation Politicalculture Civilliberties
## 14 NA NA NA
## 27 6.67 6.88 8.82
## 33 4.44 3.75 3.82
## 34 NA NA NA
## 35 NA NA NA
## 38 3.33 4.38 2.65
## Regimetype
## 14 <NA>
## 27 3FlawedDemocracy
## 33 1Authoritarian
## 34 <NA>
## 35 <NA>
## 38 1Authoritarian
felidemo=felidemo[complete.cases(felidemo),]
Con las data limpia de los ejercicios anteriores, añada a los datos que unió en la seccion anterior la data sobre uso celular. Sólo junte los casos de paises con coincidencias plenas (caso por defecto). GUARDE LA DATA INTEGRADA RESULTANTE.
felidemocelu=merge(felidemo,celu,by.x = "Country_Region",by.y = "Country")
names(felidemocelu)
## [1] "Country_Region" "happy"
## [3] "GDP_per_capita" "Social_support"
## [5] "Healthy_life_expectancy" "Freedom_to_make_life_choices"
## [7] "Generosity" "Perceptions_of_corruption"
## [9] "Score" "Electoral"
## [11] "Functioning" "Politicalparticipation"
## [13] "Politicalculture" "Civilliberties"
## [15] "Regimetype" "Telephonesmobilecellular"
## [17] "DateofInformation"
round(cor(felidemocelu[,c(2,9,16)]),2)
## happy Score Telephonesmobilecellular
## happy 1.00 0.63 -0.04
## Score 0.63 1.00 -0.05
## Telephonesmobilecellular -0.04 -0.05 1.00
plot(felidemocelu[,c(2,9,16)])
summary(felidemocelu)
## Country_Region happy GDP_per_capita Social_support
## Length:125 Min. :2.905 Min. :0.0910 Min. :0.372
## Class :character 1st Qu.:4.500 1st Qu.:0.7150 1st Qu.:1.094
## Mode :character Median :5.483 Median :0.9890 Median :1.314
## Mean :5.473 Mean :0.9397 Mean :1.248
## 3rd Qu.:6.322 3rd Qu.:1.2100 3rd Qu.:1.474
## Max. :7.632 Max. :1.6490 Max. :1.644
## Healthy_life_expectancy Freedom_to_make_life_choices Generosity
## Min. :0.0480 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.4850 1st Qu.:0.3630 1st Qu.:0.1010
## Median :0.6750 Median :0.5090 Median :0.1630
## Mean :0.6329 Mean :0.4636 Mean :0.1787
## 3rd Qu.:0.7990 3rd Qu.:0.5850 3rd Qu.:0.2530
## Max. :1.0300 Max. :0.7240 Max. :0.4840
## Perceptions_of_corruption Score Electoral
## Min. :0.0000 Min. :1.430 Min. : 0.000
## 1st Qu.:0.0510 1st Qu.:4.060 1st Qu.: 3.580
## Median :0.0880 Median :6.190 Median : 7.830
## Mean :0.1186 Mean :5.907 Mean : 6.518
## 3rd Qu.:0.1440 3rd Qu.:7.500 3rd Qu.: 9.170
## Max. :0.4570 Max. :9.870 Max. :10.000
## Functioning Politicalparticipation Politicalculture
## Min. :0.000 Min. : 1.67 Min. : 2.500
## 1st Qu.:3.570 1st Qu.: 4.44 1st Qu.: 4.380
## Median :5.360 Median : 5.56 Median : 5.630
## Mean :5.373 Mean : 5.60 Mean : 5.792
## 3rd Qu.:7.140 3rd Qu.: 6.67 3rd Qu.: 6.880
## Max. :9.640 Max. :10.00 Max. :10.000
## Civilliberties Regimetype Telephonesmobilecellular
## Min. : 0.000 1Authoritarian :31 Min. :4.107e+05
## 1st Qu.: 4.120 2Hybrid regime :28 1st Qu.:5.921e+06
## Median : 6.470 3FlawedDemocracy:46 Median :1.244e+07
## Mean : 6.257 4FullDemocracy :20 Mean :5.834e+07
## 3rd Qu.: 8.530 3rd Qu.:4.021e+07
## Max. :10.000 Max. :1.474e+09
## DateofInformation
## Min. :2017
## 1st Qu.:2017
## Median :2017
## Mean :2017
## 3rd Qu.:2017
## Max. :2017
Con la data final, responda:
linealData=felidemocelu[,c('Score','happy','Perceptions_of_corruption','Telephonesmobilecellular')]
rLineal=lm(Score~.,data=linealData)
summary(rLineal)
##
## Call:
## lm(formula = Score ~ ., data = linealData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.0064 -0.8173 0.4272 1.1039 4.0418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.527e-01 7.459e-01 -0.473 0.637
## happy 1.137e+00 1.445e-01 7.870 1.69e-12 ***
## Perceptions_of_corruption 4.153e-01 1.593e+00 0.261 0.795
## Telephonesmobilecellular -2.583e-10 8.363e-10 -0.309 0.758
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.636 on 121 degrees of freedom
## Multiple R-squared: 0.3963, Adjusted R-squared: 0.3814
## F-statistic: 26.48 on 3 and 121 DF, p-value: 3.075e-13
library(betareg)
betaData=linealData
betaData$performance=linealData$Score/10
betaData$Score=NULL
rBeta=betareg(performance~.,data=betaData)
summary(rBeta)
##
## Call:
## betareg(formula = performance ~ ., data = betaData)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -2.8851 -0.5021 0.1806 0.6813 2.4348
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.278e+00 3.161e-01 -7.205 5.79e-13 ***
## happy 4.745e-01 6.135e-02 7.734 1.04e-14 ***
## Perceptions_of_corruption 6.165e-01 6.844e-01 0.901 0.368
## Telephonesmobilecellular -1.247e-10 3.398e-10 -0.367 0.714
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 8.1455 0.9812 8.301 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 60.8 on 5 Df
## Pseudo R-squared: 0.4513
## Number of iterations: 14 (BFGS) + 5 (Fisher scoring)
Si consideramos a un país “FELIZ” si su indice de felicidad es mayor o igual al tercer cuartil del score mundial,podemos concluir de un analisis estadistico que si usamos a ‘Generosity’ y al ‘Score’ de democracia como predictores, si el primero aumentase en un punto, la probabilidad de ser feliz se eleva en 6.89 %.
binaData=felidemocelu[,c(2,7,9)]
q3=summary(binaData$happy)[5]
binaData$pass=ifelse(binaData$happy>=q3,2,1)
binaData$happy=NULL
rBina=glm(factor(pass)~.,data=binaData,family = 'binomial')
summary(rBina)
##
## Call:
## glm(formula = factor(pass) ~ ., family = "binomial", data = binaData)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5451 -0.5882 -0.2295 0.3305 3.5241
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.7325 1.5824 -5.518 3.42e-08 ***
## Generosity 6.8937 2.6988 2.554 0.0106 *
## Score 0.9582 0.2075 4.617 3.89e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 142.208 on 124 degrees of freedom
## Residual deviance: 87.111 on 122 degrees of freedom
## AIC: 93.111
##
## Number of Fisher Scoring iterations: 6
library(margins)
mrBina=margins(rBina)
interpretar:
mrBina
## Average marginal effects
## glm(formula = factor(pass) ~ ., family = "binomial", data = binaData)
## Generosity Score
## 0.7512 0.1044
summary(mrBina)
## factor AME SE z p lower upper
## Generosity 0.7512 0.2604 2.8852 0.0039 0.2409 1.2616
## Score 0.1044 0.0150 6.9439 0.0000 0.0749 0.1339
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
base= ggplot(summary(mrBina),aes(x=factor, y=AME)) + geom_point()
base + geom_errorbar(aes(ymin=lower, ymax=upper))