Análisis descriptivo de los datos

str(datos)
## Classes 'tbl_df', 'tbl' and 'data.frame':    186 obs. of  31 variables:
##  $ Country Name              : chr  "Afghanistan" "Albania" "Algeria" "Angola" ...
##  $ Region                    : chr  "Asia-Pacific" "Europe" "Middle East / North Africa" "Sub-Saharan Africa" ...
##  $ World Rank                : num  163 65 172 165 156 33 5 30 68 90 ...
##  $ Region Rank               : num  40 30 14 41 26 19 4 17 15 19 ...
##  $ 2017 Score                : num  48.9 64.4 46.5 48.5 50.4 ...
##  $ Property Rights           : num  12.6 54 38.2 36.4 32.4 ...
##  $ Judical Effectiveness     : num  28.4 28.5 29.6 19.8 39.6 ...
##  $ Government Integrity      : num  27.5 39.7 31.7 12.8 38.2 ...
##  $ Tax Burden                : num  91.6 86.9 81.1 87.7 62.6 ...
##  $ Gov't Spending            : num  79.9 72.5 51 58.6 54.6 ...
##  $ Fiscal Health             : num  97.3 51.5 19.8 70.7 56.4 ...
##  $ Business Freedom          : num  54.2 79.3 62.1 58.5 57.3 78.5 89.3 76.9 71.5 68.5 ...
##  $ Labor Freedom             : num  59.9 50.7 49.5 40.4 46.1 ...
##  $ Monetary Freedom          : num  69.3 81.4 67 70.6 50.9 ...
##  $ Trade Freedom             : num  66 87.7 63.3 56.7 66.7 ...
##  $ Investment Freedom        : num  0 70 35 30 50 80 80 90 55 50 ...
##  $ Financial Freedom         : num  0 70 30 40 50 70 90 70 50 60 ...
##  $ Tariff Rate (%)           : num  7 1.1 8.4 11.7 6.6 2.4 1.9 1.5 5.3 19.7 ...
##  $ Income Tax Rate (%)       : num  20 23 35 17 35 26 45 50 25 0 ...
##  $ Corporate Tax Rate (%)    : num  20 15 23 30 35 20 30 25 20 0 ...
##  $ Tax Burden % of GDP       : num  6.5 23.6 11.7 6.5 35.9 23.5 27.5 43 14.2 16.9 ...
##  $ Gov't Expenditure % of GDP: num  27.1 30 44.4 28.9 43.9 ...
##  $ Population (Millions)     : num  32 2.8 39.5 25.1 42.4 3.3 23.9 8.6 9.5 0.4 ...
##  $ GDP (Billions, PPP)       : num  62.3 32.7 578.7 184.4 972 ...
##  $ GDP Growth Rate (%)       : num  1.5 2.6 3.7 3 1.2 3 2.5 0.9 1.1 0.5 ...
##  $ 5 Year GDP Growth Rate (%): num  5.4 1.9 3.3 4.7 2.7 4.3 2.7 1 2.4 0.9 ...
##  $ GDP per Capita (PPP)      : num  1947 11301 14504 7344 22554 ...
##  $ Unemployment (%)          : num  9.6 17.3 10.5 7.6 6.7 16.3 6.3 5.7 4.7 14.4 ...
##  $ Inflation (%)             : num  -1.5 1.9 4.8 10.3 26.5 3.7 1.5 0.8 4 1.9 ...
##  $ FDI Inflow (Millions)     : num  58 1003 -587 8681 11655 ...
##  $ Public Debt (% of GDP)    : num  6.8 71.9 8.7 62.3 56.5 46.6 36.8 86.2 36.1 65.7 ...
datos<-datos[complete.cases(datos), ]
datos1 <- subset(datos, select = c("2017 Score","GDP per Capita (PPP)"))
rownames(datos1)<- datos$`Country Name`
colnames(datos1)<- c("Índice de libertad económica","Renta per cápita")
summary(datos1)
##  Índice de libertad económica Renta per cápita  
##  Min.   :26.96                Min.   :   629.7  
##  1st Qu.:53.98                1st Qu.:  4324.4  
##  Median :61.10                Median : 12134.8  
##  Mean   :61.51                Mean   : 19836.9  
##  3rd Qu.:68.58                3rd Qu.: 27965.9  
##  Max.   :89.82                Max.   :132098.7
par(mfrow = c(1, 2))
hist(datos1$`Renta per cápita`,labels=TRUE, col = "gold", main="",ylab="Frecuencia",
     xlab="Renta per cápita",
     border="tomato1")
hist(datos1$`Índice de libertad económica`, labels=TRUE,
     col = "gold", xlab="Índice de libertad económica",main="",
     ylab="Frecuencia",
     border="tomato1")

boxplot(datos1$`Renta per cápita`,labels=TRUE, col = "gold", main="",
        ylab="Renta per cápita ($)",
        border="black")
text(0.82, 132098, "Qatar >-" ,cex = 1)
boxplot(datos1$`Índice de libertad económica`,labels=TRUE,
        col = "gold", ylab="Índice de libertad económica (%)",main="",
        border="black")
text(0.75, 26.96, "Venezuela >-" ,cex = 1)

par(mfrow = c(1, 1))
basicPlot <- function(){
  plot(datos1$`Renta per cápita` ~ datos1$`Índice de libertad económica`,data=datos1, bty="n", lwd=2,
       main="", col="#00526D", ylim=c(0,135000),xlim=c(0,100),
       xlab="Indice de libertad económica", 
       ylab="Renta per cápita")
  axis(side = 1, col="grey")
  axis(side = 1, col="grey")
}
basicPlot()

Análisis cluster

datosz<- scale(datos1)
d <- dist(datosz, method = "euclidean")
fit <- hclust(d, method="ward.D")

plot(fit, cex = .6, xlab = "", ylab = "Distancia entre grupos", sub = "Cluster jerárquico por el método de Ward") 

Numgrupos <- 3
library(scales)
library(RColorBrewer)
plot(fit, cex = .6, xlab = "", ylab = "Distancia entre grupos", sub = "Cluster jerárquico por el método de Ward ") 
rect.hclust(fit, k = Numgrupos, border = brewer.pal(Numgrupos, "Dark2"))

grupos <- cutree(fit, k = Numgrupos)
datos1$GRUPOward <- factor(grupos)
palette(brewer.pal(Numgrupos, "Dark2"))
plot(datos1$`Renta per cápita` ~ datos1$`Índice de libertad económica`,
     xlab="Índice de libertad económica", ylab="GDP",
     col = alpha(GRUPOward, 1),data=datos1,
     pch = 19,las = 1)
text(datos1, rownames(datos1), pos = 4, cex = .5)

fit2 <- hclust(d, method = "average")
plot(fit2, cex=.6,xlab = "", ylab = "Distancia entre grupos", sub = "Cluster jerárquico por el método average")
rect.hclust(fit2, k = 3, border = brewer.pal(Numgrupos, "Dark2"))

datos1$GRUPOaverage <- factor(cutree(fit2, 3)) 
colnames(datos1)<- c("Índice de libertad económica","Renta per cápita","GRUPOward","GRUPOaverage")
plot(datos1$`Renta per cápita` ~ datos1$`Índice de libertad económica`,
     xlab="Índice de libertad económica", ylab="GDP",
     col = alpha(GRUPOaverage, 1),data=datos1, 
     pch = 19,las = 1)
text(datos1, rownames(datos1), pos = 4, cex = .5)

fit3 <- hclust(d, method="complete")
plot(fit3, cex = .6, xlab = "", ylab = "Distancia entre grupos", sub = "Cluster jerárquico por el método complete") 
rect.hclust(fit, k = 3, border = c("red","gold","green"))

grupos <- cutree(fit3, k = 3)
datos1$GRUPOcomplete <- factor(grupos)
colnames(datos1)<- c("Índice de libertad económica","Renta per cápita","GRUPOward","GRUPOaverage","GRUPOcomplete")
plot(datos1$`Renta per cápita` ~ datos1$`Índice de libertad económica`,
     xlab="Índice de libertad económica", ylab="GDP",
     col = alpha(GRUPOcomplete, 1),data=datos1,
     pch = 19,las = 1)
text(datos1, rownames(datos1), pos = 4, cex = .5)

palette(brewer.pal(Numgrupos, "Dark2"))
Data.km <- kmeans(datos1, Numgrupos)
datos1 <- data.frame(datos1, GRUPO = factor(Data.km$cluster))
colnames(datos1)<- c("Índice de libertad económica","Renta per cápita","GRUPOward","GRUPOaverage","GRUPOcomplete","GRUPOkmeans")
plot(datos1$`Renta per cápita`~ datos1$`Índice de libertad económica`,
     xlab="Índice de libertad económica", ylab="GDP",
     col = alpha(GRUPOkmeans, 1),data=datos1, 
     pch = 19,las = 1)
text(datos1, rownames(datos1), pos = 4, cex = .5)
points(Data.km$centers,col = 1:Numgrupos,  pch = 15, cex = 3)

Análisis de la varianza

modelo.aov <- aov(datos1$`Renta per cápita`~ GRUPOward, data = datos1)
summary(modelo.aov)
##              Df    Sum Sq   Mean Sq F value Pr(>F)    
## GRUPOward     2 3.909e+10 1.955e+10   80.49 <2e-16 ***
## Residuals   169 4.104e+10 2.428e+08                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(1, 2))
boxplot(datos1$`Renta per cápita` ~ GRUPOward, main = "Boxplot GDP", col = 1:3, data = datos1, las = 1)
boxplot(datos1$`Índice de libertad económica` ~ GRUPOward, main = "Boxplot de libertad economica", col = 1:3, data = datos1, las = 1)

TukeyHSD(modelo.aov)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = datos1$`Renta per cápita` ~ GRUPOward, data = datos1)
## 
## $GRUPOward
##          diff       lwr       upr     p adj
## 2-1  1479.959 -5715.365  8675.282 0.8778828
## 3-1 31914.132 24814.626 39013.639 0.0000000
## 3-2 30434.174 23865.678 37002.670 0.0000000
modelo.aov2 <- aov(datos1$`Renta per cápita` ~ GRUPOaverage, data = datos1)
summary(modelo.aov2)
##               Df    Sum Sq   Mean Sq F value Pr(>F)    
## GRUPOaverage   2 3.461e+10 1.730e+10   64.24 <2e-16 ***
## Residuals    169 4.552e+10 2.694e+08                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxplot(datos1$`Renta per cápita` ~ GRUPOaverage, main = "Boxplot GDP", col = 1:3, data = datos1, las = 1)
boxplot(datos1$`Índice de libertad económica` ~ GRUPOaverage, main = "Boxplot de libertad economica", col = 1:3, data = datos1, las = 1)

TukeyHSD(modelo.aov2)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = datos1$`Renta per cápita` ~ GRUPOaverage, data = datos1)
## 
## $GRUPOaverage
##            diff        lwr       upr     p adj
## 2-1  71787.2851   56809.22  86765.35 0.0000000
## 3-1   -244.0006  -39170.81  38682.81 0.9998789
## 3-2 -72031.2857 -113519.50 -30543.08 0.0001847
modelo.aov3 <- aov(datos1$`Renta per cápita` ~ GRUPOcomplete, data = datos1)
summary(modelo.aov3)
##                Df    Sum Sq   Mean Sq F value Pr(>F)    
## GRUPOcomplete   2 3.994e+10 1.997e+10   83.98 <2e-16 ***
## Residuals     169 4.019e+10 2.378e+08                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxplot(datos1$`Renta per cápita` ~ GRUPOcomplete, main = "Boxplot GDP", col = 1:3, data = datos1, las = 1)
boxplot(datos1$`Índice de libertad económica` ~ GRUPOcomplete, main = "Boxplot de libertad economica", col = 1:3, data = datos1, las = 1)

TukeyHSD(modelo.aov3)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = datos1$`Renta per cápita` ~ GRUPOcomplete, data = datos1)
## 
## $GRUPOcomplete
##         diff      lwr       upr p adj
## 2-1 17170.02 11252.90  23087.13     0
## 3-1 97018.74 78168.48 115869.00     0
## 3-2 79848.73 61288.14  98409.31     0
modelo.aov4 <- aov(datos1$`Índice de libertad económica` ~ GRUPOcomplete, data = datos1)
summary(modelo.aov4)
##                Df Sum Sq Mean Sq F value Pr(>F)    
## GRUPOcomplete   2  10150    5075   122.7 <2e-16 ***
## Residuals     169   6988      41                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(modelo.aov4)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = datos1$`Índice de libertad económica` ~ GRUPOcomplete, data = datos1)
## 
## $GRUPOcomplete
##         diff       lwr      upr     p adj
## 2-1 15.52866 13.061396 17.99592 0.0000000
## 3-1 26.10530 18.245286 33.96531 0.0000000
## 3-2 10.57664  2.837413 18.31586 0.0042063
modelo.aov5 <- aov(datos1$`Renta per cápita` ~ GRUPOkmeans, data = datos1)
summary(modelo.aov5)
##              Df    Sum Sq   Mean Sq F value Pr(>F)    
## GRUPOkmeans   2 6.752e+10 3.376e+10   452.6 <2e-16 ***
## Residuals   169 1.261e+10 7.460e+07                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelo.aov6 <- aov(datos1$`Índice de libertad económica` ~ GRUPOkmeans, data = datos1)
summary(modelo.aov6)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## GRUPOkmeans   2   6880    3440   56.68 <2e-16 ***
## Residuals   169  10257      61                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pirateplot(datos1$`Renta per cápita` ~  GRUPOkmeans,
           hdi.o = 1, ylab="GDP",
           point.o = 1,
           point.pch = 1,
           main = "Boxplot GDP", data = datos1)

pirateplot(datos1$`Índice de libertad económica` ~  GRUPOkmeans,
           hdi.o = 1, ylab="GDP",point.o = 1,point.pch = 1,
           main = "Boxplot ILE", data = datos1)

TukeyHSD(modelo.aov5)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = datos1$`Renta per cápita` ~ GRUPOkmeans, data = datos1)
## 
## $GRUPOkmeans
##          diff       lwr       upr p adj
## 2-1 -49348.28 -57184.49 -41512.07     0
## 3-1 -79187.25 -86646.64 -71727.87     0
## 3-2 -29838.98 -33413.04 -26264.92     0
TukeyHSD(modelo.aov6)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = datos1$`Índice de libertad económica` ~ GRUPOkmeans, data = datos1)
## 
## $GRUPOkmeans
##           diff       lwr        upr  p adj
## 2-1  -3.941705 -11.00987   3.126460 0.3868
## 3-1 -16.939359 -23.66763 -10.211090 0.0000
## 3-2 -12.997655 -16.22141  -9.773898 0.0000

Comparación de grupos significativos

kable(tablaILE)
GRUPO1 GRUPO2 GRUPO3
Min. :26.96 Min. :55.00 Min. :70.67
1st Qu.:48.92 1st Qu.:61.09 1st Qu.:72.50
Median :52.35 Median :65.18 Median :74.52
Mean :50.97 Mean :66.50 Mean :77.07
3rd Qu.:53.98 3rd Qu.:70.63 3rd Qu.:79.10
Max. :56.41 Max. :89.82 Max. :88.58
kable(tablaGDP)
GRUPO1 GRUPO2 GRUPO3
Min. : 629.7 Min. : 1462 Min. : 85253
1st Qu.: 1762.8 1st Qu.: 8528 1st Qu.: 94915
Median : 3737.3 Median :18868 Median : 98561
Mean : 6599.8 Mean :23770 Mean :103619
3rd Qu.:10440.4 3rd Qu.:35931 3rd Qu.:107265
Max. :31757.7 Max. :79587 Max. :132099
levels(datos1$GRUPOcomplete)<- c("Bajo","Medio","Alto")
divisionILE<- cut(x = datos1$`Índice de libertad económica`, 
                  breaks = seq(20, 100, by = 26),
                  labels = c("Economia intervencionista","Economia mixta","Economia de mercado"),
                  include.lowest = TRUE)
datosgrafico<- data.frame(divisionILE,datos1$`Renta per cápita`,datos1$GRUPOcomplete)
colnames(datosgrafico)<-c("Indice_libertad_economica","Renta_per_capita", "Cluster_por_el_metodo_complete")
table(divisionILE)
## divisionILE
## Economia intervencionista            Economia mixta 
##                         5                       137 
##       Economia de mercado 
##                        30
ggplot(datosgrafico, aes(Indice_libertad_economica ,Renta_per_capita, col=Cluster_por_el_metodo_complete)) + geom_point() 

Regresiones

par(mfrow = c(1, 1))
lsq.mod <- lm(datos1$`Renta per cápita`~ datos1$`Índice de libertad económica`)
summary(lsq.mod)
## 
## Call:
## lm(formula = datos1$`Renta per cápita` ~ datos1$`Índice de libertad económica`)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -30023 -10326  -3879   6685  96390 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                           -64344.3     8000.6  -8.042 1.44e-13
## datos1$`Índice de libertad económica`   1368.7      128.4  10.660  < 2e-16
##                                          
## (Intercept)                           ***
## datos1$`Índice de libertad económica` ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16810 on 170 degrees of freedom
## Multiple R-squared:  0.4006, Adjusted R-squared:  0.3971 
## F-statistic: 113.6 on 1 and 170 DF,  p-value: < 2.2e-16
basicPlot()
abline(lsq.mod, col="orange", lwd=3)
legend(x="topleft", bty="n", lwd=c(2,2), lty=c(NA,1),
       legend=c("Paises", "Recta de regresión lineal"),
       col=c("#00526D","orange"),  pch=c(1,NA))
text(32.5, 90000, expression(R^2 ==0.4), cex = 1)

re <- lm(datos1$`Renta per cápita`~ poly(datos1$`Índice de libertad económica`,2))
summary(re)
## 
## Call:
## lm(formula = datos1$`Renta per cápita` ~ poly(datos1$`Índice de libertad económica`, 
##     2))
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -33337  -8191  -3598   6099  95645 
## 
## Coefficients:
##                                                 Estimate Std. Error
## (Intercept)                                        19837       1221
## poly(datos1$`Índice de libertad económica`, 2)1   179172      16011
## poly(datos1$`Índice de libertad económica`, 2)2    68599      16011
##                                                 t value Pr(>|t|)    
## (Intercept)                                      16.249  < 2e-16 ***
## poly(datos1$`Índice de libertad económica`, 2)1  11.190  < 2e-16 ***
## poly(datos1$`Índice de libertad económica`, 2)2   4.284 3.07e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16010 on 169 degrees of freedom
## Multiple R-squared:  0.4593, Adjusted R-squared:  0.4529 
## F-statistic: 71.79 on 2 and 169 DF,  p-value: < 2.2e-16
basicPlot()
legend(x="topleft", bty="n", lwd=c(2,2), lty=c(NA,1),
         legend=c("Paises", "Modelo polinomico^2"),
       col=c("#00526D","orange"),  pch=c(1,NA))
lines(sort(datos1$`Índice de libertad económica`), fitted(re)[order(datos1$`Índice de libertad económica`)], col='orange', type='l',lwd=3) 
text(32.5, 90000, expression(R^2 ==0.45), cex = 1)

re2 <- lm(log(datos1$`Renta per cápita`)~ datos1$`Índice de libertad económica`)
summary(re2)
## 
## Call:
## lm(formula = log(datos1$`Renta per cápita`) ~ datos1$`Índice de libertad económica`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.24894 -0.56726  0.01879  0.63949  3.15734 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)
## (Intercept)                           4.449063   0.447960   9.932   <2e-16
## datos1$`Índice de libertad económica` 0.078450   0.007189  10.912   <2e-16
##                                          
## (Intercept)                           ***
## datos1$`Índice de libertad económica` ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9411 on 170 degrees of freedom
## Multiple R-squared:  0.4119, Adjusted R-squared:  0.4085 
## F-statistic: 119.1 on 1 and 170 DF,  p-value: < 2.2e-16
basicPlot()
legend(x="topleft", bty="n", lwd=c(2,2), lty=c(NA,1),
       legend=c("Paises", "Modelo exponencial"),
       col=c("#00526D","orange"),  pch=c(1,NA))
lines(sort(datos1$`Índice de libertad económica`),exp(fitted(re2))[order(datos1$`Índice de libertad económica`)], col='orange', type='l',lwd=3)

basicPlot()
abline(lsq.mod, col="green", lwd=3)
lines(sort(datos1$`Índice de libertad económica`), fitted(re)[order(datos1$`Índice de libertad económica`)], col='orange', type='l',lwd=3) 
lines(sort(datos1$`Índice de libertad económica`),exp(fitted(re2))[order(datos1$`Índice de libertad económica`)], col='steelblue', type='l',lwd=3)
legend(x="topleft", bty="n", lwd=c(2,2), lty=c(NA,1,1,1),
       legend=c("Paises", "Modelo lineal","Modelo polinomico ^2","Modelo exponencial"),
       col=c("#00526D","green","orange","steelblue"),  pch=c(1,NA,NA,NA))