Análisis Exploratorio

library(readxl)

PBI_LPI <- read_excel("PBI_LPI.xls")
PBI_LPI
## # A tibble: 160 × 14
##        N PBIPEA     Agr    Ind Comercio Exp_Mer Exp_ByS Exp_Comb   Exp_MM
##    <dbl>  <dbl>   <dbl>  <dbl>    <dbl>   <dbl>   <dbl>    <dbl>    <dbl>
##  1     1  6276. 0.210   0.113     0.366 0.00901  0.0690  0       0.000101
##  2     2 28603. 0.199   0.0566    0.558 0.0515   0.289   0.00576 0.00454 
##  3     3 52937. 0.123   0.0561    0.485 0.0475   0.210   0.0447  0.000120
##  4     4 15749. 0       0         0.418 0.146    0.300   0       0       
##  5     5 45039. 0.0635  0.136     0.205 0.0652   0.126   0.00165 0.00218 
##  6     6 20792. 0.164   0.103     0.480 0.0614   0.331   0.00235 0.0233  
##  7     7 89515. 0.0243  0.0607    0.322 0.169    0.193   0.0467  0.0535  
##  8     8 90843. 0.0110  0.162     0.793 0.369    0.523   0.00630 0.0118  
##  9     9 33202. 0.056   0.049     0.572 0.0796   0.464   0.0696  0.00129 
## 10    10 40889. 0.00880 0.0316    0.265 0.0500   0.358   0       0       
## # ℹ 150 more rows
## # ℹ 5 more variables: Prod_oil <dbl>, RRNN <dbl>, Tierras_Agr <dbl>,
## #   Exp_H_Tech <dbl>, LPI <dbl>
plot (PBI_LPI)

library(car)
## Loading required package: carData
PBI_LPI_1 <- PBI_LPI[ ,c(2,3,4,5,6,7,8,9,10,11,12,13,14)]
PBI_LPI_1
## # A tibble: 160 × 13
##    PBIPEA     Agr    Ind Comercio Exp_Mer Exp_ByS Exp_Comb   Exp_MM Prod_oil
##     <dbl>   <dbl>  <dbl>    <dbl>   <dbl>   <dbl>    <dbl>    <dbl>    <dbl>
##  1  6276. 0.210   0.113     0.366 0.00901  0.0690  0       0.000101      0  
##  2 28603. 0.199   0.0566    0.558 0.0515   0.289   0.00576 0.00454     171. 
##  3 52937. 0.123   0.0561    0.485 0.0475   0.210   0.0447  0.000120   1129. 
##  4 15749. 0       0         0.418 0.146    0.300   0       0          1491. 
##  5 45039. 0.0635  0.136     0.205 0.0652   0.126   0.00165 0.00218     259. 
##  6 20792. 0.164   0.103     0.480 0.0614   0.331   0.00235 0.0233        0  
##  7 89515. 0.0243  0.0607    0.322 0.169    0.193   0.0467  0.0535      227. 
##  8 90843. 0.0110  0.162     0.793 0.369    0.523   0.00630 0.0118       33.4
##  9 33202. 0.056   0.049     0.572 0.0796   0.464   0.0696  0.00129    1678. 
## 10 40889. 0.00880 0.0316    0.265 0.0500   0.358   0       0             0  
## # ℹ 150 more rows
## # ℹ 4 more variables: RRNN <dbl>, Tierras_Agr <dbl>, Exp_H_Tech <dbl>,
## #   LPI <dbl>
scatterplotMatrix(PBI_LPI_1)

library(corrplot)
## corrplot 0.95 loaded
M <- cor(PBI_LPI_1)
corrplot(M)

library(psych)
## 
## Attaching package: 'psych'
## The following object is masked from 'package:car':
## 
##     logit
corPlot(PBI_LPI_1)

## Componentes

PC1 <- princomp(PBI_LPI_1)
PC1
## Call:
## princomp(x = PBI_LPI_1)
## 
## Standard deviations:
##       Comp.1       Comp.2       Comp.3       Comp.4       Comp.5       Comp.6 
## 3.874850e+04 1.430167e+03 4.528068e-01 3.510142e-01 2.052192e-01 1.389599e-01 
##       Comp.7       Comp.8       Comp.9      Comp.10      Comp.11      Comp.12 
## 9.436348e-02 7.346306e-02 5.800053e-02 5.002169e-02 2.268443e-02 1.699327e-02 
##      Comp.13 
## 1.401994e-02 
## 
##  13  variables and  160 observations.
plot(PC1)

summary(PC1)
## Importance of components:
##                              Comp.1       Comp.2       Comp.3       Comp.4
## Standard deviation     3.874850e+04 1.430167e+03 4.528068e-01 3.510142e-01
## Proportion of Variance 9.986396e-01 1.360419e-03 1.363719e-10 8.194992e-11
## Cumulative Proportion  9.986396e-01 1.000000e+00 1.000000e+00 1.000000e+00
##                              Comp.5       Comp.6       Comp.7       Comp.8
## Standard deviation     2.052192e-01 1.389599e-01 9.436348e-02 7.346306e-02
## Proportion of Variance 2.801141e-11 1.284334e-11 5.922526e-12 3.589526e-12
## Cumulative Proportion  1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
##                              Comp.9      Comp.10      Comp.11      Comp.12
## Standard deviation     5.800053e-02 5.002169e-02 2.268443e-02 1.699327e-02
## Proportion of Variance 2.237500e-12 1.664240e-12 3.422590e-13 1.920671e-13
## Cumulative Proportion  1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
##                             Comp.13
## Standard deviation     1.401994e-02
## Proportion of Variance 1.307348e-13
## Cumulative Proportion  1.000000e+00
biplot(PC1)
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped

acp2 <-PC1$scores[ ,1:2]
plot(acp2)

Clusters

library(cluster)

agp3 = agnes(acp2,method="average")
plot(agp3)

agpcut <- cutree(agp3,3)
par(mfrow=c(1,1))
plot(acp2,col=agpcut)

Clusters Clara

plot(clara(PBI_LPI_1,6))

modelo <- clara(PBI_LPI_1, k = 6)
modelo$clustering
##   [1] 1 2 3 2 4 2 5 5 4 4 5 1 4 5 1 2 2 4 4 4 6 4 1 1 1 1 5 1 3 2 2 1 1 2 4 1 3
##  [38] 3 3 5 1 4 2 4 2 1 3 1 2 5 5 3 1 2 5 1 3 2 1 1 2 1 1 5 3 3 2 2 3 3 6 5 5 2
##  [75] 3 4 4 1 3 4 6 1 1 3 4 1 1 3 6 1 1 3 2 1 5 2 4 4 2 4 4 2 1 4 1 5 3 1 1 2 6
## [112] 5 2 4 1 2 2 2 3 3 6 3 3 1 1 6 1 4 1 6 3 3 1 1 4 5 2 5 5 1 1 1 2 1 3 4 3 4
## [149] 1 2 5 5 5 4 2 4 1 1 1 1
clusters <- split(1:nrow(PBI_LPI_1), modelo$clustering)
clusters
## $`1`
##  [1]   1  12  15  23  24  25  26  28  32  33  36  41  46  48  53  56  59  60  62
## [20]  63  78  82  83  86  87  90  91  94 103 105 108 109 115 124 125 127 129 133
## [39] 134 140 141 142 144 149 157 158 159 160
## 
## $`2`
##  [1]   2   4   6  16  17  30  31  34  43  45  49  54  58  61  67  68  74  93  96
## [20]  99 102 110 113 116 117 118 137 143 150 155
## 
## $`3`
##  [1]   3  29  37  38  39  47  52  57  65  66  69  70  75  79  84  88  92 107 119
## [20] 120 122 123 131 132 145 147
## 
## $`4`
##  [1]   5   9  10  13  18  19  20  22  35  42  44  76  77  80  85  97  98 100 101
## [20] 104 114 128 135 146 148 154 156
## 
## $`5`
##  [1]   7   8  11  14  27  40  50  51  55  64  72  73  95 106 112 136 138 139 151
## [20] 152 153
## 
## $`6`
## [1]  21  71  81  89 111 121 126 130
PBI_LPI_cluster<- PBI_LPI
PBI_LPI_cluster$cluster <- modelo$clustering
PBI_LPI_cluster
## # A tibble: 160 × 15
##        N PBIPEA     Agr    Ind Comercio Exp_Mer Exp_ByS Exp_Comb   Exp_MM
##    <dbl>  <dbl>   <dbl>  <dbl>    <dbl>   <dbl>   <dbl>    <dbl>    <dbl>
##  1     1  6276. 0.210   0.113     0.366 0.00901  0.0690  0       0.000101
##  2     2 28603. 0.199   0.0566    0.558 0.0515   0.289   0.00576 0.00454 
##  3     3 52937. 0.123   0.0561    0.485 0.0475   0.210   0.0447  0.000120
##  4     4 15749. 0       0         0.418 0.146    0.300   0       0       
##  5     5 45039. 0.0635  0.136     0.205 0.0652   0.126   0.00165 0.00218 
##  6     6 20792. 0.164   0.103     0.480 0.0614   0.331   0.00235 0.0233  
##  7     7 89515. 0.0243  0.0607    0.322 0.169    0.193   0.0467  0.0535  
##  8     8 90843. 0.0110  0.162     0.793 0.369    0.523   0.00630 0.0118  
##  9     9 33202. 0.056   0.049     0.572 0.0796   0.464   0.0696  0.00129 
## 10    10 40889. 0.00880 0.0316    0.265 0.0500   0.358   0       0       
## # ℹ 150 more rows
## # ℹ 6 more variables: Prod_oil <dbl>, RRNN <dbl>, Tierras_Agr <dbl>,
## #   Exp_H_Tech <dbl>, LPI <dbl>, cluster <int>
library(openxlsx)

write.xlsx(PBI_LPI_cluster, file = "PBI_LPI_cluster")

RED NEURONAL

library(neuralnet)  # regression

library(nnet) # classification 

library(NeuralNetTools)

library(plyr)
normalize<-function(x){
  return ( (x-min(x))/(max(x)-min(x)))
}

PBI_LPI_norm<-as.data.frame(lapply(PBI_LPI_1,FUN=normalize))
summary(PBI_LPI_norm)
##      PBIPEA             Agr               Ind            Comercio     
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.03572   1st Qu.:0.03331   1st Qu.:0.1884   1st Qu.:0.1191  
##  Median :0.13322   Median :0.10551   Median :0.3406   Median :0.1621  
##  Mean   :0.18355   Mean   :0.17648   Mean   :0.3332   Mean   :0.1881  
##  3rd Qu.:0.28131   3rd Qu.:0.27452   3rd Qu.:0.4560   3rd Qu.:0.2189  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.0000   Max.   :1.0000  
##     Exp_Mer           Exp_ByS          Exp_Comb             Exp_MM       
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.000e+00   Min.   :0.00000  
##  1st Qu.:0.04445   1st Qu.:0.0946   1st Qu.:1.360e-06   1st Qu.:0.00123  
##  Median :0.07775   Median :0.1454   Median :1.306e-02   Median :0.01673  
##  Mean   :0.11938   Mean   :0.1752   Mean   :9.673e-02   Mean   :0.05765  
##  3rd Qu.:0.14587   3rd Qu.:0.2088   3rd Qu.:8.600e-02   3rd Qu.:0.04817  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.000e+00   Max.   :1.00000  
##     Prod_oil              RRNN          Tierras_Agr       Exp_H_Tech      
##  Min.   :0.000e+00   Min.   :0.00000   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.:0.000e+00   1st Qu.:0.01106   1st Qu.:0.3061   1st Qu.:0.000000  
##  Median :8.173e-05   Median :0.04794   Median :0.5063   Median :0.008377  
##  Mean   :3.789e-02   Mean   :0.13455   Mean   :0.4991   Mean   :0.048017  
##  3rd Qu.:7.798e-03   3rd Qu.:0.21329   3rd Qu.:0.7103   3rd Qu.:0.046164  
##  Max.   :1.000e+00   Max.   :1.00000   Max.   :1.0000   Max.   :1.000000  
##       LPI        
##  Min.   :0.0000  
##  1st Qu.:0.2962  
##  Median :0.4038  
##  Mean   :0.4644  
##  3rd Qu.:0.5977  
##  Max.   :1.0000
indice <- sample(2, nrow(PBI_LPI_norm), replace = TRUE, prob = c(0.7,0.3))
PBI_LPI_train <- PBI_LPI_1[indice==1,]
PBI_LPI_test  <- PBI_LPI_1[indice==2,]
library(neuralnet)


PBI_LPI_model <- neuralnet(LPI ~ Agr + Ind + Comercio + Exp_Mer + Exp_ByS + Exp_Comb + Exp_MM + Prod_oil + RRNN + Tierras_Agr + Exp_H_Tech , data = PBI_LPI_train)

str(PBI_LPI_model)
## List of 14
##  $ call               : language neuralnet(formula = LPI ~ Agr + Ind + Comercio + Exp_Mer + Exp_ByS + Exp_Comb +      Exp_MM + Prod_oil + RRNN + T| __truncated__
##  $ response           : num [1:116, 1] 2.13 2.51 2.16 2.93 2.49 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:116] "1" "2" "3" "4" ...
##   .. ..$ : chr "LPI"
##  $ covariate          : num [1:116, 1:11] 0.2097 0.1991 0 0.0635 0.1639 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:11] "Agr" "Ind" "Comercio" "Exp_Mer" ...
##  $ model.list         :List of 2
##   ..$ response : chr "LPI"
##   ..$ variables: chr [1:11] "Agr" "Ind" "Comercio" "Exp_Mer" ...
##  $ err.fct            :function (x, y)  
##   ..- attr(*, "type")= chr "sse"
##  $ act.fct            :function (x)  
##   ..- attr(*, "type")= chr "logistic"
##  $ linear.output      : logi TRUE
##  $ data               :'data.frame': 116 obs. of  13 variables:
##   ..$ PBIPEA     : num [1:116] 6276 28603 15749 45039 20792 ...
##   ..$ Agr        : num [1:116] 0.2097 0.1991 0 0.0635 0.1639 ...
##   ..$ Ind        : num [1:116] 0.1131 0.0566 0 0.136 0.1028 ...
##   ..$ Comercio   : num [1:116] 0.366 0.558 0.418 0.205 0.48 ...
##   ..$ Exp_Mer    : num [1:116] 0.00901 0.05151 0.14606 0.06522 0.06142 ...
##   ..$ Exp_ByS    : num [1:116] 0.069 0.289 0.3 0.126 0.331 ...
##   ..$ Exp_Comb   : num [1:116] 0 0.00576 0 0.00165 0.00235 ...
##   ..$ Exp_MM     : num [1:116] 0.000101 0.004538 0 0.002181 0.023324 ...
##   ..$ Prod_oil   : num [1:116] 0 171 1491 259 0 ...
##   ..$ RRNN       : num [1:116] 0.00598 0.01973 0.13217 0.01302 0.04827 ...
##   ..$ Tierras_Agr: num [1:116] 0.581 0.429 0.475 0.543 0.589 ...
##   ..$ Exp_H_Tech : num [1:116] 0 0.000334 0 0.005735 0.003648 ...
##   ..$ LPI        : num [1:116] 2.13 2.51 2.16 2.93 2.49 ...
##  $ exclude            : NULL
##  $ net.result         :List of 1
##   ..$ : num [1:116, 1] 2.37 2.37 2.45 2.87 2.42 ...
##  $ weights            :List of 1
##   ..$ :List of 2
##   .. ..$ : num [1:12, 1] -2.27 -22.52 18.79 -4.12 8.5 ...
##   .. ..$ : num [1:2, 1] 2.37 1.51
##  $ generalized.weights:List of 1
##   ..$ : num [1:116, 1:11] 0.0264 0.0139 0.5067 1.4066 0.3229 ...
##  $ startweights       :List of 1
##   ..$ :List of 2
##   .. ..$ : num [1:12, 1] -0.4337 0.4647 0.0253 0.6412 -2.2035 ...
##   .. ..$ : num [1:2, 1] -0.8486 0.0625
##  $ result.matrix      : num [1:17, 1] 4.5789 0.0041 1808 -2.2728 -22.5195 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:17] "error" "reached.threshold" "steps" "Intercept.to.1layhid1" ...
##   .. ..$ : NULL
##  - attr(*, "class")= chr "nn"
plot(PBI_LPI_model, rep = "best")

model_results <- compute(PBI_LPI_model,PBI_LPI_test[1:13])
predicted_LPI <- model_results$net.result
cor(predicted_LPI,PBI_LPI_test$LPI)
##           [,1]
## [1,] 0.7829385
predicted_LPI
##           [,1]
##  [1,] 2.370268
##  [2,] 2.374159
##  [3,] 2.588066
##  [4,] 3.843029
##  [5,] 2.469517
##  [6,] 2.370408
##  [7,] 2.545683
##  [8,] 3.808853
##  [9,] 2.508531
## [10,] 2.405733
## [11,] 2.443978
## [12,] 2.429153
## [13,] 2.456763
## [14,] 2.618534
## [15,] 3.441217
## [16,] 2.369351
## [17,] 2.516894
## [18,] 2.775076
## [19,] 2.567271
## [20,] 3.493072
## [21,] 2.456106
## [22,] 2.747431
## [23,] 3.861237
## [24,] 2.369089
## [25,] 2.368942
## [26,] 2.743455
## [27,] 2.458317
## [28,] 2.872181
## [29,] 2.378721
## [30,] 2.373238
## [31,] 2.588420
## [32,] 2.405902
## [33,] 3.509258
## [34,] 2.895118
## [35,] 3.352965
## [36,] 2.368888
## [37,] 2.379451
## [38,] 2.368591
## [39,] 2.377482
## [40,] 2.390365
## [41,] 2.368663
## [42,] 2.368776
## [43,] 3.436184
## [44,] 3.370929
PBI_LPI_test$LPI
##  [1] 2.455828 2.254286 2.759056 3.180760 1.937576 2.359471 2.585589 3.026809
##  [9] 2.753272 2.331307 2.045015 2.630513 3.003740 2.097716 3.814949 2.380741
## [17] 2.466700 2.896920 2.882424 3.695022 2.389892 2.622612 3.820868 2.364158
## [25] 2.027063 2.527753 2.738210 3.390797 2.478651 2.663394 2.929829 2.300339
## [33] 2.791576 2.931236 3.353241 2.354357 2.327711 1.785748 2.389656 1.642313
## [41] 2.255333 2.528847 3.980922 3.966106

Mejorar el modelo

PBI_LPI_model2 <- neuralnet(LPI ~ Agr + Ind + Comercio + Exp_Mer + Exp_ByS + Exp_Comb + Exp_MM + Prod_oil + RRNN + Tierras_Agr + Exp_H_Tech , data = PBI_LPI_train, hidden = c(2,4))

plot(PBI_LPI_model2 ,rep = "best")

model_results2 <- compute(PBI_LPI_model2,PBI_LPI_test[1:13])
predicted_LPI2 <- model_results2$net.result
cor(predicted_LPI2,PBI_LPI_test$LPI)
##           [,1]
## [1,] 0.3678225
predicted_LPI2
##           [,1]
##  [1,] 2.251338
##  [2,] 2.262804
##  [3,] 3.795465
##  [4,] 3.763839
##  [5,] 2.482329
##  [6,] 2.298121
##  [7,] 2.925812
##  [8,] 3.358604
##  [9,] 2.540178
## [10,] 3.229286
## [11,] 3.687699
## [12,] 2.591372
## [13,] 2.586871
## [14,] 3.241967
## [15,] 3.218938
## [16,] 2.272910
## [17,] 3.353442
## [18,] 2.737070
## [19,] 2.952647
## [20,] 3.290148
## [21,] 2.528378
## [22,] 2.917347
## [23,] 3.727752
## [24,] 2.306503
## [25,] 2.263656
## [26,] 2.978076
## [27,] 2.441635
## [28,] 2.829389
## [29,] 2.357855
## [30,] 2.648245
## [31,] 2.942413
## [32,] 2.317951
## [33,] 3.113339
## [34,] 3.012232
## [35,] 3.091225
## [36,] 2.356732
## [37,] 2.361321
## [38,] 2.266459
## [39,] 3.335811
## [40,] 3.635318
## [41,] 2.423627
## [42,] 2.354919
## [43,] 3.144276
## [44,] 2.957650
PBI_LPI_test$LPI
##  [1] 2.455828 2.254286 2.759056 3.180760 1.937576 2.359471 2.585589 3.026809
##  [9] 2.753272 2.331307 2.045015 2.630513 3.003740 2.097716 3.814949 2.380741
## [17] 2.466700 2.896920 2.882424 3.695022 2.389892 2.622612 3.820868 2.364158
## [25] 2.027063 2.527753 2.738210 3.390797 2.478651 2.663394 2.929829 2.300339
## [33] 2.791576 2.931236 3.353241 2.354357 2.327711 1.785748 2.389656 1.642313
## [41] 2.255333 2.528847 3.980922 3.966106