ggplot(data=datosdes, aes(Año,Tc_pib_per_capita))+
labs(y="Tasa de crecimiento del PIB per cápita real español")+
geom_line(linetype=1,aes(x=Año,y=Tc_pib_per_capita),
size=1.3,color="steelblue")+
geom_point(size=2,data=datosdes
,aes(x=Año,y=Tc_pib_per_capita))+
scale_x_continuous(breaks = seq(1984, 2016, by = 1)) +
scale_y_continuous(breaks = seq(-0.06, 0.1, by = 0.01))+
geom_hline(yintercept = 0,alpha=0.8,size=1)+
theme(axis.text.x = element_text(angle = 90, vjust=0.5),
text = element_text(size=12))

ggplot(data=datosdes)+labs(y="Tasa de variación FBCF (UMN) a precios actuales ",x="Año")+
theme(panel.background = element_rect(fill = 'grey93', colour = 'black'))+
annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = -25, ymax = 20,
alpha = .5)+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = -25, ymax = 20,
alpha = .5)+
geom_hline(yintercept = 0,alpha=0.7)+
geom_line(linetype=1, size=1.8,color="steelblue",
aes(x=Año,y=fbcf[2:34,3]*100))+
scale_x_continuous(breaks = seq(1984, 2016, by = 1)) +
scale_y_continuous(breaks = seq(-25, 20, by = 5))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5),
text = element_text(size=12))

train.c <- sample(1:nrow(modelo1), nrow(modelo1)*0.7)
datos.train <- modelo1[train.c, ]
datos.test <- modelo1[-train.c, ]
table(datos.train$pib)
##
## 0 1
## 18 5
table(datos.test$pib)
##
## 0 1
## 8 2
LOGIT
depstep <- glm(pib~tvahorro,data =datos.train, family = binomial(link = "logit"))
summary(depstep)
##
## Call:
## glm(formula = pib ~ tvahorro, family = binomial(link = "logit"),
## data = datos.train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.79783 -0.29508 -0.10826 -0.00923 1.59755
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.4772 1.0064 0.474 0.6354
## tvahorro -60.9674 28.3570 -2.150 0.0316 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 24.0850 on 22 degrees of freedom
## Residual deviance: 9.4229 on 21 degrees of freedom
## AIC: 13.423
##
## Number of Fisher Scoring iterations: 7
pro<-predict (depstep, datos.test,type = "response")
prdBln1 <- ifelse(pro > 0.5, 1, 0)
cnfmtrx1 <- table(prd=prdBln1, act=datos.test$pib)
confusionMatrix(cnfmtrx1)
## Confusion Matrix and Statistics
##
## act
## prd 0 1
## 0 8 1
## 1 0 1
##
## Accuracy : 0.9
## 95% CI : (0.555, 0.9975)
## No Information Rate : 0.8
## P-Value [Acc > NIR] : 0.3758
##
## Kappa : 0.6154
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 1.0000
## Specificity : 0.5000
## Pos Pred Value : 0.8889
## Neg Pred Value : 1.0000
## Prevalence : 0.8000
## Detection Rate : 0.8000
## Detection Prevalence : 0.9000
## Balanced Accuracy : 0.7500
##
## 'Positive' Class : 0
##
x<- as.numeric(datos.test$pib)
pmea<-mean(abs(x-pro))*100
pmea
## [1] 12.02898
for(i in 1:100){
train.cb <- sample(1:nrow(modelo1), nrow(modelo1)*0.7)
datos.trainb <- modelo1[train.cb, ]
datos.testb <- modelo1[-train.cb, ]
br <- glm(pib~tvahorro,data =datos.trainb, family = binomial(link = "logit"))
br.<-predict (br, datos.testb,type = "response")
br1<-c(br1,br.)
año<-c(año,as.numeric(row.names(datos.testb))+1983)
pmeabuclelogit1<-mean(abs((as.numeric(datos.testb$pib))-br.))*100
prdBlnbucle1 <- ifelse(br. > 0.5, 1, 0)
scorebucle1<-mean(datos.testb$pib == prdBlnbucle1)
vectorlogit1[i]<- pmeabuclelogit1
newvectorlogit1[i]<-scorebucle1
ea<-c(ea, prdBlnbucle1)
ea1<-c(ea1,datos.testb[,1])
}
año<-año[-1]
ea<-ea[-1]
ea1<-ea1[-1]
br1<-br1[-1]
confusionMatrix(table(prd=ea, act=ea1))
## Confusion Matrix and Statistics
##
## act
## prd 0 1
## 0 749 65
## 1 41 145
##
## Accuracy : 0.894
## 95% CI : (0.8732, 0.9124)
## No Information Rate : 0.79
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.6665
## Mcnemar's Test P-Value : 0.02549
##
## Sensitivity : 0.9481
## Specificity : 0.6905
## Pos Pred Value : 0.9201
## Neg Pred Value : 0.7796
## Prevalence : 0.7900
## Detection Rate : 0.7490
## Detection Prevalence : 0.8140
## Balanced Accuracy : 0.8193
##
## 'Positive' Class : 0
##
mean(vectorlogit1)
## [1] 12.53793
g<-ggplot(data=datosgrafico1, aes(Año,Probabilidades))+scale_x_continuous(breaks = seq(1984, 2016, by = 1)) +annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))
for(i in 1:100){
g<-g+geom_point(shape=1,stroke=2,colour = 'steelblue',size=3.5,aes_string(año[(i*10-9):(i*10)],br1[(i*10-9):(i*10)]*100))
}
g

for (i in 1:33){
trainloo<-modelo1[-i,]
testloo<- modelo1[i,]
ppro <- glm(pib~tvahorro,data =trainloo, family = binomial(link = "probit"))
ppro1<-predict (ppro, testloo,type = "response")
probap<-c(probap,ppro1)
testloo <- as.numeric(modelo1[i,1])
prdloopro <- ifelse( ppro1> 0.5, 1, 0)
scorevectorpro[i]<-mean(testloo == prdloopro)
vectorloopro[i]<-mean(abs(testloo-ppro1))*100
propro[i]<-ppro1
}
mean(scorevectorpro)
## [1] 0.8787879
mean(vectorloopro)
## [1] 13.67422
ggplot(data=datosgrafico, aes(Año,Probabilidades))+annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
geom_line(linetype=1, colour = 'steelblue',size=1.8,aes(datosgrafico$Año,probap*100))+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
geom_point(shape=21,fill="white",stroke=3,size=3.5,colour = 'black',aes(datosgrafico$Año,probap*100))+
scale_x_continuous(breaks = round(seq(1984, 2016, by = 1))) +
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))

medialo<-rep(mean(as.numeric(modelo1$pib)),33)
tfglcompleto <- glm(pib~tvahorro,data =modelo1, family = binomial(link = "logit"))
tfglcompleto1<-predict (tfglcompleto, modelo1,type = "response")
sslc<- sum((tfglcompleto1-medialo)^2)
sstlc<- sum(((as.numeric(modelo1$pib))-medialo)^2)
rlc<-(sslc/sstlc)
rlc
## [1] 0.6501186
PROBIT
probit <- glm(pib~tvahorro,data =datos.trainb,
family = binomial(link = "probit"))
summary(probit)
##
## Call:
## glm(formula = pib ~ tvahorro, family = binomial(link = "probit"),
## data = datos.trainb)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.86766 -0.37703 -0.09617 -0.00007 1.46058
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.4166 0.5575 0.747 0.4549
## tvahorro -34.4754 14.6539 -2.353 0.0186 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 24.085 on 22 degrees of freedom
## Residual deviance: 11.649 on 21 degrees of freedom
## AIC: 15.649
##
## Number of Fisher Scoring iterations: 8
l1probit<-predict (probit, datos.testb,type = "response")
prdBlnp <- ifelse(l1probit > 0.5, 1, 0)
cnfmtrxp <- table(prd=prdBlnp, act=datos.test$pib)
confusionMatrix(cnfmtrxp)
## Confusion Matrix and Statistics
##
## act
## prd 0 1
## 0 6 2
## 1 2 0
##
## Accuracy : 0.6
## 95% CI : (0.2624, 0.8784)
## No Information Rate : 0.8
## P-Value [Acc > NIR] : 0.9672
##
## Kappa : -0.25
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.750
## Specificity : 0.000
## Pos Pred Value : 0.750
## Neg Pred Value : 0.000
## Prevalence : 0.800
## Detection Rate : 0.600
## Detection Prevalence : 0.800
## Balanced Accuracy : 0.375
##
## 'Positive' Class : 0
##
pmeap<-mean(abs(x-l1probit))*100
pmeap
## [1] 40.50727
for(i in 1:100){
train.cb <- sample(1:nrow(modelo1), nrow(modelo1)*0.7)
datos.trainb <- modelo1[train.cb, ]
datos.testb <- modelo1[-train.cb, ]
finalprobit <- glm(pib~tvahorro,data =datos.trainb,
family = binomial(link = "probit"))
final1probit<-predict (finalprobit, datos.testb,type = "response")
brpr<-c(brpr,final1probit)
añoprobit<-c(añoprobit,as.numeric(row.names(datos.testb))+1983)
prdBlfinal <- ifelse(final1probit > 0.5, 1, 0)
scorefinalprobit<-mean(datos.testb$pib == prdBlfinal)
pmeabucleprobit<-mean(abs((as.numeric(datos.testb$pib))-final1probit))*100
newvectorprobit[i]<-scorefinalprobit
vectorprobit[i]<- pmeabucleprobit
elpr<-c(elpr, prdBlfinal)
elpr1<-c(elpr1,as.numeric(datos.testb[,1]))
}
primeropr <- table(prd=elpr, act=elpr1)
confusionMatrix(primeropr)
## Confusion Matrix and Statistics
##
## act
## prd 0 1
## 0 748 66
## 1 49 137
##
## Accuracy : 0.885
## 95% CI : (0.8636, 0.9041)
## No Information Rate : 0.797
## P-Value [Acc > NIR] : 1.07e-13
##
## Kappa : 0.6332
## Mcnemar's Test P-Value : 0.1357
##
## Sensitivity : 0.9385
## Specificity : 0.6749
## Pos Pred Value : 0.9189
## Neg Pred Value : 0.7366
## Prevalence : 0.7970
## Detection Rate : 0.7480
## Detection Prevalence : 0.8140
## Balanced Accuracy : 0.8067
##
## 'Positive' Class : 0
##
mean(vectorprobit)
## [1] 13.45202
p<-ggplot(data=datosgrafico1, aes(Año,Probabilidades))+scale_x_continuous(breaks = seq(1984, 2016, by = 1)) +annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))
for(i in 1:100){
p<-p+geom_point(shape=1,stroke=2,colour = 'steelblue',size=3.5,aes_string(añoprobit[(i*10-9):(i*10)],brpr[(i*10-9):(i*10)]*100))
}
p

for (i in 1:33){
trainloo<-modelo1[-i,]
testloo<- modelo1[i,]
ppro <- glm(pib~tvahorro,data =trainloo, family = binomial(link = "probit"))
ppro1<-predict (ppro, testloo,type = "response")
probap<-c(probap,ppro1)
testloo <- as.numeric(modelo1[i,1])
prdloopro <- ifelse( ppro1> 0.5, 1, 0)
scorevectorpro[i]<-mean(testloo == prdloopro)
vectorloopro[i]<-mean(abs(testloo-ppro1))*100
propro[i]<-ppro1
}
mean(scorevectorpro)
## [1] 0.8787879
mean(vectorloopro)
## [1] 13.67422
ggplot(data=datosgrafico, aes(Año,Probabilidades))+annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
geom_line(linetype=1, colour = 'steelblue',size=1.8,aes(datosgrafico$Año,propro*100))+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
geom_point(shape=21,fill="white",stroke=3,size=4.5,colour = 'black',aes(datosgrafico$Año,propro*100))+
scale_x_continuous(breaks = round(seq(1984, 2016, by = 1))) +
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))

proc <- glm(pib~tvahorro,data =modelo1, family = binomial(link = "probit"))
summary(proc)
##
## Call:
## glm(formula = pib ~ tvahorro, family = binomial(link = "probit"),
## data = modelo1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.88838 -0.21035 -0.04140 -0.00001 1.54935
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3860 0.5234 0.738 0.46077
## tvahorro -38.2484 14.3619 -2.663 0.00774 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 34.106 on 32 degrees of freedom
## Residual deviance: 12.594 on 31 degrees of freedom
## AIC: 16.594
##
## Number of Fisher Scoring iterations: 9
proc1<-predict (proc, modelo1,type = "response")
sslp<- sum((proc1-medialo)^2)
sstlc<- sum(((as.numeric(modelo1$pib))-medialo)^2)
rlcp<-(sslp/sstlc)
rlcp
## [1] 0.6473693
DISCRIMINANT ANALYSIS
LDA <- lda(pib ~negocios, data =datos.train, method = "mve")
LDA1<- predict(LDA, datos.test)
meLDA<-mean(abs(x-(LDA1$posterior[,2])))*100
meLDA
## [1] 1.533611
for(i in 1:100){
train.cb <- sample(1:nrow(modelo1), nrow(modelo1)*0.7)
datos.trainb <- modelo1[train.cb, ]
datos.testb <- modelo1[-train.cb, ]
modelomve <- lda(pib ~negocios, data =datos.trainb, method = "mve")
mve <- predict(modelomve, newdata = datos.testb)
problda<-c(problda,mve$posterior[,2])
mveif <- ifelse(mve$posterior[,2] > 0.5, 1, 0)
prelda3[i]<-mean(datos.testb$pib == mveif)
mepalda3[i]<-mean(abs((as.numeric(datos.testb$pib))-mve$posterior[,2]))*100
añolda<-c(añolda,as.numeric(row.names(datos.testb))+1983)
elprlda<-c(elprlda, mveif)
elprlda1<-c(elprlda1,as.numeric(datos.testb[,1]))
}
primerolda <- table(prd=elprlda, act=elprlda1)
confusionMatrix(primerolda)
## Confusion Matrix and Statistics
##
## act
## prd 0 1
## 0 790 26
## 1 0 184
##
## Accuracy : 0.974
## 95% CI : (0.9621, 0.9829)
## No Information Rate : 0.79
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9179
## Mcnemar's Test P-Value : 9.443e-07
##
## Sensitivity : 1.0000
## Specificity : 0.8762
## Pos Pred Value : 0.9681
## Neg Pred Value : 1.0000
## Prevalence : 0.7900
## Detection Rate : 0.7900
## Detection Prevalence : 0.8160
## Balanced Accuracy : 0.9381
##
## 'Positive' Class : 0
##
mean(mepalda3)
## [1] 5.405337
l<-ggplot(data=datosgrafico1, aes(Año,Probabilidades))+scale_x_continuous(breaks = seq(1984, 2016, by = 1)) +annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))
for(i in 1:100){
l<-l+geom_point(shape=1,stroke=2,colour = 'steelblue',size=3.5,aes_string(añolda[(i*10-9):(i*10)],problda[(i*10-9):(i*10)]*100))
}
l

for (i in 1:33){
trainloo<- modelo1[-i,]
testloo<- modelo1[i,]
modelomveloo <- lda(pib ~negocios, data =trainloo, method = "mve")
mveloo <- predict(modelomveloo, newdata = testloo, type = "response")
testloo <- as.numeric(modelo1[i,1])
testloo <- testloo-1
mveifloo <- ifelse(mveloo$posterior[,2] > 0.5, 1, 0)
prelda3loo[i]<-mean(testloo == mveifloo)
mepalda3loo[i]<-mean(abs(testloo-mveloo$posterior[,2]))*100
probldaloo<-c(probldaloo,mveloo$posterior[,2])
}
mean(prelda3loo)
## [1] 0.03030303
mean(mepalda3loo)
## [1] 97.82513
ggplot(data=datosgrafico, aes(Año,Probabilidades))+annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
geom_line(linetype=1, colour = 'steelblue',size=1.8,aes(datosgrafico$Año,probldaloo*100))+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
geom_point(shape=21,fill="white",stroke=3,size=4.5,colour = 'black',aes(datosgrafico$Año,probldaloo*100))+
scale_x_continuous(breaks = round(seq(1984, 2016, by = 1))) +
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))

LDAc <- lda(pib ~negocios, data =modelo1, method = "mve")
LDAc1<- predict(LDAc, modelo1)
ss<- sum((LDAc1$posterior[,2]-medialo)^2)
sst<- sum(((as.numeric(modelo1$pib))-medialo)^2)
rlda<-(ss/sst)
rlda
## [1] 0.8573345
SUPPORT VECTOR MACHINE
svm <- svm(pib ~ negocios+euribor, data =datos.train, probability=TRUE)
svm1 <- predict(svm, datos.test, probability=TRUE)
for(i in 1:100){
train.cb <- sample(1:nrow(modelo1), nrow(modelo1)*0.7)
datos.trainb <- modelo1[train.cb, ]
datos.testb <- modelo1[-train.cb, ]
fitbucle <- svm(pib ~ negocios+euribor, data =datos.trainb, probability=TRUE)
predbucle1 <- predict(fitbucle, datos.testb)
svmif <- ifelse(predbucle1 > 0.5, 1, 0)
pmeasvmbucle<-mean(abs(predbucle1-((as.numeric(datos.testb$pib)))))*100
scorebucle<-mean(datos.testb$pib == svmif)
vectorsvm[i]<- pmeasvmbucle
newvectorsvm[i]<- scorebucle
elSVM<-c(elSVM, svmif)
elSVM1<-c(elSVM1,as.numeric(datos.testb[,1]))
probSVM<-c(probSVM,predbucle1)
añoSVM<-c(añoSVM,as.numeric(row.names(datos.testb))+1983)
}
primeroSVM <- table(prd=elSVM, act=elSVM1)
confusionMatrix(primeroSVM)
## Confusion Matrix and Statistics
##
## act
## prd 0 1
## 0 765 117
## 1 0 118
##
## Accuracy : 0.883
## 95% CI : (0.8614, 0.9023)
## No Information Rate : 0.765
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6068
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.5021
## Pos Pred Value : 0.8673
## Neg Pred Value : 1.0000
## Prevalence : 0.7650
## Detection Rate : 0.7650
## Detection Prevalence : 0.8820
## Balanced Accuracy : 0.7511
##
## 'Positive' Class : 0
##
mean(vectorsvm)
## [1] 15.82514
for (i in 1:33){
trainloo<- modelo1[-i,]
testloo<- modelo1[i,]
fitbucleLOO <- svm(pib ~ euribor+negocios, data =trainloo, probability=TRUE)
predbucleLOO <- predict(fitbucleLOO, testloo, probability=TRUE)
testloo <- as.numeric(modelo1[i,1])
prosvm[i]<-predbucleLOO
vectorLOOsvm[i]<-mean(abs(predbucleLOO-testloo))*100
ifloosvm <- ifelse(predbucleLOO > 0.5, 1, 0)
newvectorLOOsvm[i]<-mean(testloo == ifloosvm)
}
mean( vectorLOOsvm)
## [1] 11.65092
mean(newvectorLOOsvm)
## [1] 0.9393939
ggplot(data=datosgrafico, aes(Año,Probabilidades))+annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
geom_line(linetype=1, colour = 'steelblue',size=1.8,aes(datosgrafico$Año,prosvm*100))+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
geom_point(shape=21,fill="white",stroke=3,size=4.5,colour = 'black',aes(datosgrafico$Año,prosvm*100))+
scale_x_continuous(breaks = round(seq(1984, 2016, by = 1))) +
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))

DECISION TREES
ac <- rpart(pib ~ TVFBC+tvgastobruto, data =datos.train)
ac1<- predict(ac,datos.test)
pemaac<-mean(abs((as.numeric(datos.test$pib))-ac1)*100)
pemaac
## [1] 20
for(i in 1:100){
train.cb <- sample(1:nrow(modelo1), nrow(modelo1)*0.7)
datos.trainb <- modelo1[train.cb, ]
datos.testb <- modelo1[-train.cb, ]
acc <- rpart(pib ~ TVFBC+tvgastobruto, data =datos.trainb)
acc1<- predict(acc,datos.testb)
probaac<-c(probaac,acc1)
prd <- ifelse(acc1 > 0.5, 1, 0)
accscore[i]<-mean(datos.testb$pib == prd)
acmepa[i]<-mean(abs(x- acc1))*100
añoac<-c(añoac,as.numeric(row.names(datos.testb))+1983)
probac<-c(probac, prd)
probac1<-c(probac1,as.numeric(datos.testb[,1]))
}
probac<-probac[-1]
añoac<-añoac[-1]
probac1<-probac1[-1]
probaac<-probaac[-1]
primeroac <- table(prd=probac, act=probac1)
confusionMatrix(primeroac)
## Confusion Matrix and Statistics
##
## act
## prd 0 1
## 0 708 58
## 1 83 151
##
## Accuracy : 0.859
## 95% CI : (0.8359, 0.88)
## No Information Rate : 0.791
## P-Value [Acc > NIR] : 2.019e-08
##
## Kappa : 0.5915
## Mcnemar's Test P-Value : 0.04326
##
## Sensitivity : 0.8951
## Specificity : 0.7225
## Pos Pred Value : 0.9243
## Neg Pred Value : 0.6453
## Prevalence : 0.7910
## Detection Rate : 0.7080
## Detection Prevalence : 0.7660
## Balanced Accuracy : 0.8088
##
## 'Positive' Class : 0
##
mean(acmepa)
## [1] 29.55714
ac<-ggplot(data=datosgrafico1, aes(Año,Probabilidades))+scale_x_continuous(breaks = seq(1984, 2016, by = 1)) +annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))
for(i in 1:100){
ac<-ac+geom_point(shape=1,stroke=2,colour = 'steelblue',size=3.5,
aes_string(añoac[(i*10-9):(i*10)],
probaac[(i*10-9):(i*10)]*100))
}
ac

for (i in 1:33){
trainloo<- modelo1[-i,]
testloo<- modelo1[i,]
accc <- rpart(pib ~ TVFBC+tvgastobruto, data =trainloo)
accc1<- predict(accc,testloo)
testloo <- as.numeric(modelo1[i,1])
prdlcc <- ifelse(accc1 > 0.5, 1, 0)
aclooscore[i]<-mean(testloo == prdlcc)
acloomepa[i]<-mean(abs(testloo-accc1))*100
probac[i]<-accc1
}
mean(aclooscore)
## [1] 0.969697
mean(acloomepa)
## [1] 6.060606
ggplot(data=datosgrafico, aes(Año,Probabilidades))+annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
geom_line(linetype=1, colour = 'steelblue',size=1.8,aes(datosgrafico$Año,probac*100))+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
geom_point(shape=21,fill="white",stroke=3,size=4.5,colour = 'black',aes(datosgrafico$Año,probac*100))+
scale_x_continuous(breaks = round(seq(1984, 2016, by = 1))) +
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))

ssac<- sum((probac-medialo)^2)
sstac<- sum(((as.numeric(modelo1$pib))-medialo)^2)
rac<-(ssac/sstac)
rac
## [1] 0.844584
RANDOM FOREST
set.seed(71)
RFcompleto<-randomForest(as.factor(pib) ~ TVFBC+tvgastobruto,importance=TRUE,
proximity=TRUE, data =datos.train)
RFcompleto1 <- predict(RFcompleto, newdata = datos.test, type = 'prob')[,2]
for(i in 1:100){
train.cb <- sample(1:nrow(modelo1), nrow(modelo1)*0.7)
datos.trainb <- modelo1[train.cb, ]
datos.testb <- modelo1[-train.cb, ]
RFbucle<-randomForest(as.factor(pib) ~ TVFBC+tvgastobruto, data =datos.trainb)
fitForestbucle <- predict(RFbucle, newdata = datos.testb, type = 'prob')[, 2]
fitForestprueba <- predict(RFbucle, newdata = datos.testb)
probrfg<-c(probrfg,fitForestbucle)
pmeabuclet<-mean(abs((as.numeric(datos.testb[,1]))-fitForestbucle ))*100
score<-mean(datos.testb$pib == fitForestprueba)
vector[i]<- pmeabuclet
newvector[i]<- score
añorf<-c(añorf,as.numeric(row.names(datos.testb))+1983)
probrf<-c(probrf, as.numeric(fitForestprueba)-1)
probrf1<-c(probrf1,as.numeric(datos.testb[,1])-1)
}
mean(vector)
## [1] 4.795
rf<-ggplot(data=datosgrafico1, aes(Año,Probabilidades))+scale_x_continuous(breaks = seq(1984, 2016, by = 1)) +annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))
for(i in 1:100){
rf<-rf+geom_point(shape=1,stroke=2,colour = 'steelblue',size=3.5,
aes_string(añorf[(i*10-9):(i*10)],
probrfg[(i*10-9):(i*10)]*100))
}
rf

for (i in 1:33){
trainloo<- modelo1[-i,]
testloo<- modelo1[i,]
RFbucle<-randomForest(as.factor(pib) ~ TVFBC+tvgastobruto, data =trainloo)
fitForestloo <- predict(RFbucle, newdata = testloo, type = 'prob')[, 2]
pr <- ifelse(fitForestloo > 0.5, 1, 0)
testloo <- as.numeric(modelo1[i,1])
vectorrfscore[i]<-mean(testloo == pr)
vectorrf[i]<-mean(abs(testloo-fitForestloo))*100
probabilidadesrf[i]<-fitForestloo
}
mean(vectorrf)
## [1] 4.424242
mean(vectorrfscore)
## [1] 1
ggplot(data=datosgrafico, aes(Año,Probabilidades))+annotate("rect", xmin = 1992.5, xmax = 1993.5, ymin = 0, ymax = 100,
alpha = .5)+labs(y="Probabilidades (%)")+
annotate("rect", xmin = 2007.5, xmax = 2013.5, ymin = 0, ymax = 100,
alpha = .5)+
geom_line(linetype=1, colour = 'steelblue',size=1.8,aes(datosgrafico$Año,probabilidadesrf*100))+
theme(panel.background = element_rect(fill = 'grey93'),
panel.grid.minor = element_line(colour="white",size=1),
panel.grid.major = element_line(colour="white",size=1))+
geom_point(shape=21,fill="white",stroke=3,size=4.5,colour = 'black',aes(datosgrafico$Año,probabilidadesrf*100))+
scale_x_continuous(breaks = round(seq(1984, 2016, by = 1))) +
scale_y_continuous(breaks = round(seq(0, 100, by = 10)))+
theme(text = element_text(size=13))+
theme(axis.text.x = element_text(angle = 90, vjust=0.5))

RFcompleto<-randomForest(as.factor(pib) ~ TVFBC+tvgastobruto, data =modelo1)
RFcompleto1 <- predict(RFcompleto, newdata = modelo1, type = 'prob')[, 2]
ssrf<- sum((RFcompleto1-medialo)^2)
sstrf<- sum(((as.numeric(modelo1$pib))-medialo)^2)
rsvm<-(ssrf/sstrf)
rsvm
## [1] 0.9216059
ARIMA
atvahorro<-auto.arima(modelo1$tvahorro)
plot(forecast(atvahorro,h=4))

aconstruccion<-auto.arima(modelo1$Construccion)
plot(forecast(aconstruccion))

anegocios<-auto.arima(modelo1$negocios)
plot(forecast(anegocios))

aeuribor<-auto.arima(modelo1$euribor)
plot(forecast(aeuribor))

atvfbc<-auto.arima(modelo1$tvfbc)
plot(forecast(atvfbc))

atvgastobruto<-auto.arima(modelo1$tvgastobruto)
plot(forecast(atvgastobruto))

ptvahrro<-forecast(atvahorro,h=4)
pconstruccion<-forecast(aconstruccion,h=4)
pnegocios<-forecast(anegocios,h=4)
peuribor<-forecast(aeuribor,h=4)
ptvfbc<-forecast(atvfbc,h=4)
ptvgastobruto<-forecast(atvgastobruto,h=4)
up<-ptvahrro$mean + 0.8416*sqrt(atvahorro$sigma2)
lo<-ptvahrro$mean - 0.8416*sqrt(atvahorro$sigma2)
upc<-pconstruccion$mean + 0.8416*sqrt(aconstruccion$sigma2)
loc<-pconstruccion$mean - 0.8416*sqrt(aconstruccion$sigma2)
upn<-pnegocios$mean + 0.8416*sqrt(anegocios$sigma2)
lon<-pnegocios$mean - 0.8416*sqrt(anegocios$sigma2)
upe<-peuribor$mean + 0.8416*sqrt(aeuribor$sigma2)
loe<-peuribor$mean - 0.8416*sqrt(aeuribor$sigma2)
upp<-ptvfbc$mean + 0.8416*sqrt(atvfbc$sigma2)
lop<-ptvfbc$mean - 0.8416*sqrt(atvfbc$sigma2)
upg<-ptvgastobruto$mean + 0.8416*sqrt(atvgastobruto$sigma2)
log<-ptvgastobruto$mean - 0.8416*sqrt(atvgastobruto$sigma2)
datalogit<-data.frame(modelo1$pib,modelo1$tvahorro,
modelo1$Construccion,modelo1$negocios,modelo1$euribor,
modelo1$tvfbc,modelo1$tvgastobruto)
colnames(datalogit)<-c("pib","tvahorro","construccion","negocios","euribor","tvfbc","tvgastobruto")
prelogit<-glm(pib~tvahorro+construccion,family = binomial(link = "logit"),datalogit)
datalogitp<-data.frame(ptvahrro$mean,pconstruccion$mean,pnegocios$mean,
peuribor$mean,ptvfbc$mean,ptvgastobruto$mean)
colnames(datalogitp)<-c("tvahorro","construccion","negocios","euribor","tvfbc","tvgastobruto")
datalogitpe<-data.frame(lo,loc,lon,
loe,lop,log)
colnames(datalogitpe)<-c("tvahorro","construccion","negocios","euribor","tvfbc","tvgastobruto")
datalogitop<-data.frame(up,upc,upn,upe,upp,upg)
colnames(datalogitop)<-c("tvahorro","construccion","negocios","euribor","tvfbc","tvgastobruto")
prediclogitprobable<-predict(prelogit,newdata = datalogitp,type="response"); prediclogitprobable*100
## 1 2 3 4
## 3.589468 4.122017 4.184879 4.459766
prediclogitpesimista<-predict(prelogit,newdata = datalogitpe,type="response");prediclogitpesimista*100
## 1 2 3 4
## 39.71775 43.20820 43.59609 45.23750
prediclogitoptimista<-predict(prelogit,newdata = datalogitop,type="response");prediclogitoptimista*100
## 1 2 3 4
## 0.2099439 0.2423526 0.2462004 0.2630826
######probit#######
preprobit <- glm(pib~tvahorro,data =datalogit, family = binomial(link = "probit"))
predicprobitprobable<-predict(preprobit,newdata = datalogitp,type="response");predicprobitprobable*100
## 1 2 3 4
## 0.4153453 0.4841371 0.4516590 0.4661914
predicprobitpesimista<-predict(preprobit,newdata = datalogitpe,type="response");predicprobitpesimista*100
## 1 2 3 4
## 14.05275 15.25457 14.69938 14.95036
predicprobitoptimista<-predict(preprobit,newdata = datalogitop,type="response");predicprobitoptimista*100
## 1 2 3 4
## 0.001330360 0.001674475 0.001508510 0.001582008
#####lda####
LDAp <- lda(pib ~negocios, data =datalogit, method = "mve")
ldaprobable<-predict(LDAp, datalogitp); ldaprobable$posterior[,2]*100
## 1 2 3 4
## 0.0003083086 0.1197622942 0.2922833337 0.3340571737
ldappesimista<-predict(LDAp, datalogitpe);ldappesimista$posterior[,2]*100
## 1 2 3 4
## 0.06138178 19.28120883 36.86769556 40.03782460
ldapoptimista<-predict(LDAp, datalogitop);ldapoptimista$posterior[,2]*100
## 1 2 3 4
## 1.547633e-06 6.018924e-04 1.471465e-03 1.682471e-03
####svm####
svmpre <- svm(pib ~ negocios+euribor, data =datalogit, probability=TRUE)
svmp <- predict(svmpre, datalogitp, probability=TRUE);svmp*100
## 1 2 3 4
## 7.294759 4.087522 6.697190 7.407709
svmpe <- predict(svmpre, datalogitpe, probability=TRUE);svmpe*100
## 1 2 3 4
## 7.029029 34.745248 41.746702 42.852690
svmop <- predict(svmpre, datalogitop, probability=TRUE);svmop*100
## 1 2 3 4
## 18.1721526 2.7104780 0.9663035 0.9078932
####arboles#####
acpre <- rpart(pib ~ tvfbc+tvgastobruto, data =datalogit)
acpro<-predict(acpre, datalogitp);acpro*100
## 1 2 3 4
## 0 0 0 0
acpesimista<-predict(acpre, datalogitpe);acpesimista*100
## 1 2 3 4
## 0 0 0 0
acoptimista<-predict(acpre, datalogitop);acoptimista*100
## 1 2 3 4
## 0 0 0 0
####randomforest#####
RFpre<-randomForest(as.factor(pib) ~ tvfbc+tvgastobruto, data =datalogit)
RFpepro <- predict(RFpre, newdata = datalogitp, type = 'prob')[, 2];RFpepro*100
## 1 2 3 4
## 0 0 0 0
RFpesimista <- predict(RFpre, newdata = datalogitpe,
type = 'prob')[, 2];RFpesimista*100
## 1 2 3 4
## 36.2 66.0 66.2 66.4
RFoptimista <- predict(RFpre, newdata = datalogitop,
type = 'prob')[, 2];RFoptimista*100
## 1 2 3 4
## 0 0 0 0