# Caret

# Instalando os pacotes
#install.packages("caret")
#install.packages("randomForest")

# Carregando os pacotes
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(datasets)

# Usando o dataset mtcars
data(mtcars)

# Funcao do Caret para divisao dos dados
?createDataPartition
## starting httpd help server ...
##  done
split <- createDataPartition(y = mtcars$mpg, p = 0.7, list = FALSE)

# Criando dados de treino e de teste
dados_treino <- mtcars[split,]
dados_teste <- mtcars[-split,]

# Treinando o modelo
?train
names(getModelInfo())
##   [1] "ada"                 "AdaBag"              "AdaBoost.M1"        
##   [4] "adaboost"            "amdai"               "ANFIS"              
##   [7] "avNNet"              "awnb"                "awtan"              
##  [10] "bag"                 "bagEarth"            "bagEarthGCV"        
##  [13] "bagFDA"              "bagFDAGCV"           "bam"                
##  [16] "bartMachine"         "bayesglm"            "bdk"                
##  [19] "binda"               "blackboost"          "blasso"             
##  [22] "blassoAveraged"      "Boruta"              "bridge"             
##  [25] "brnn"                "BstLm"               "bstSm"              
##  [28] "bstTree"             "C5.0"                "C5.0Cost"           
##  [31] "C5.0Rules"           "C5.0Tree"            "cforest"            
##  [34] "chaid"               "CSimca"              "ctree"              
##  [37] "ctree2"              "cubist"              "dda"                
##  [40] "deepboost"           "DENFIS"              "dnn"                
##  [43] "dwdLinear"           "dwdPoly"             "dwdRadial"          
##  [46] "earth"               "elm"                 "enet"               
##  [49] "enpls.fs"            "enpls"               "evtree"             
##  [52] "extraTrees"          "fda"                 "FH.GBML"            
##  [55] "FIR.DM"              "foba"                "FRBCS.CHI"          
##  [58] "FRBCS.W"             "FS.HGD"              "gam"                
##  [61] "gamboost"            "gamLoess"            "gamSpline"          
##  [64] "gaussprLinear"       "gaussprPoly"         "gaussprRadial"      
##  [67] "gbm"                 "gcvEarth"            "GFS.FR.MOGUL"       
##  [70] "GFS.GCCL"            "GFS.LT.RS"           "GFS.THRIFT"         
##  [73] "glm"                 "glmboost"            "glmnet"             
##  [76] "glmStepAIC"          "gpls"                "hda"                
##  [79] "hdda"                "hdrda"               "HYFIS"              
##  [82] "icr"                 "J48"                 "JRip"               
##  [85] "kernelpls"           "kknn"                "knn"                
##  [88] "krlsPoly"            "krlsRadial"          "lars"               
##  [91] "lars2"               "lasso"               "lda"                
##  [94] "lda2"                "leapBackward"        "leapForward"        
##  [97] "leapSeq"             "Linda"               "lm"                 
## [100] "lmStepAIC"           "LMT"                 "loclda"             
## [103] "logicBag"            "LogitBoost"          "logreg"             
## [106] "lssvmLinear"         "lssvmPoly"           "lssvmRadial"        
## [109] "lvq"                 "M5"                  "M5Rules"            
## [112] "manb"                "mda"                 "Mlda"               
## [115] "mlp"                 "mlpML"               "mlpSGD"             
## [118] "mlpWeightDecay"      "mlpWeightDecayML"    "multinom"           
## [121] "nb"                  "nbDiscrete"          "nbSearch"           
## [124] "neuralnet"           "nnet"                "nnls"               
## [127] "nodeHarvest"         "oblique.tree"        "OneR"               
## [130] "ordinalNet"          "ORFlog"              "ORFpls"             
## [133] "ORFridge"            "ORFsvm"              "ownn"               
## [136] "pam"                 "parRF"               "PART"               
## [139] "partDSA"             "pcaNNet"             "pcr"                
## [142] "pda"                 "pda2"                "penalized"          
## [145] "PenalizedLDA"        "plr"                 "pls"                
## [148] "plsRglm"             "polr"                "ppr"                
## [151] "protoclass"          "pythonKnnReg"        "qda"                
## [154] "QdaCov"              "qrf"                 "qrnn"               
## [157] "randomGLM"           "ranger"              "rbf"                
## [160] "rbfDDA"              "Rborist"             "rda"                
## [163] "relaxo"              "rf"                  "rFerns"             
## [166] "RFlda"               "rfRules"             "ridge"              
## [169] "rlda"                "rlm"                 "rmda"               
## [172] "rocc"                "rotationForest"      "rotationForestCp"   
## [175] "rpart"               "rpart1SE"            "rpart2"             
## [178] "rpartCost"           "rpartScore"          "rqlasso"            
## [181] "rqnc"                "RRF"                 "RRFglobal"          
## [184] "rrlda"               "RSimca"              "rvmLinear"          
## [187] "rvmPoly"             "rvmRadial"           "SBC"                
## [190] "sda"                 "sddaLDA"             "sddaQDA"            
## [193] "sdwd"                "simpls"              "SLAVE"              
## [196] "slda"                "smda"                "snn"                
## [199] "sparseLDA"           "spikeslab"           "spls"               
## [202] "stepLDA"             "stepQDA"             "superpc"            
## [205] "svmBoundrangeString" "svmExpoString"       "svmLinear"          
## [208] "svmLinear2"          "svmLinear3"          "svmLinearWeights"   
## [211] "svmLinearWeights2"   "svmPoly"             "svmRadial"          
## [214] "svmRadialCost"       "svmRadialSigma"      "svmRadialWeights"   
## [217] "svmSpectrumString"   "tan"                 "tanSearch"          
## [220] "treebag"             "vbmpRadial"          "vglmAdjCat"         
## [223] "vglmContRatio"       "vglmCumulative"      "widekernelpls"      
## [226] "WM"                  "wsrf"                "xgbLinear"          
## [229] "xgbTree"             "xyf"
# Regressao linear
modelolm <- train(mpg ~ ., data = dados_treino, method = "lm")

# Regressao logistica
modelolm2 <- train(mpg ~ ., data = dados_treino, method = "glm")

# Random forest
modelolm3 <- train(mpg ~ ., data = dados_treino, method = "rf")
## Loading required package: randomForest
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
# Resumo do modelo
summary(modelolm)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0504 -1.7144  0.1626  1.3853  4.1936 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -30.031897  28.206510  -1.065   0.3064  
## cyl           0.584525   1.055176   0.554   0.5890  
## disp          0.006436   0.021130   0.305   0.7655  
## hp           -0.006903   0.026111  -0.264   0.7956  
## drat          2.363192   2.108162   1.121   0.2826  
## wt           -5.090450   2.574063  -1.978   0.0696 .
## qsec          2.815753   1.230073   2.289   0.0394 *
## vs           -3.121273   2.694013  -1.159   0.2675  
## am            2.186772   2.275734   0.961   0.3541  
## gear          0.795180   1.763481   0.451   0.6595  
## carb          0.337487   0.939505   0.359   0.7252  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.484 on 13 degrees of freedom
## Multiple R-squared:  0.8969, Adjusted R-squared:  0.8175 
## F-statistic: 11.31 on 10 and 13 DF,  p-value: 7.048e-05
summary(modelolm2)
## 
## Call:
## NULL
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0504  -1.7144   0.1626   1.3853   4.1936  
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -30.031897  28.206510  -1.065   0.3064  
## cyl           0.584525   1.055176   0.554   0.5890  
## disp          0.006436   0.021130   0.305   0.7655  
## hp           -0.006903   0.026111  -0.264   0.7956  
## drat          2.363192   2.108162   1.121   0.2826  
## wt           -5.090450   2.574063  -1.978   0.0696 .
## qsec          2.815753   1.230073   2.289   0.0394 *
## vs           -3.121273   2.694013  -1.159   0.2675  
## am            2.186772   2.275734   0.961   0.3541  
## gear          0.795180   1.763481   0.451   0.6595  
## carb          0.337487   0.939505   0.359   0.7252  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 6.169274)
## 
##     Null deviance: 777.650  on 23  degrees of freedom
## Residual deviance:  80.201  on 13  degrees of freedom
## AIC: 121.06
## 
## Number of Fisher Scoring iterations: 2
summary(modelolm3)
##                 Length Class      Mode     
## call              4    -none-     call     
## type              1    -none-     character
## predicted        24    -none-     numeric  
## mse             500    -none-     numeric  
## rsq             500    -none-     numeric  
## oob.times        24    -none-     numeric  
## importance       10    -none-     numeric  
## importanceSD      0    -none-     NULL     
## localImportance   0    -none-     NULL     
## proximity         0    -none-     NULL     
## ntree             1    -none-     numeric  
## mtry              1    -none-     numeric  
## forest           11    -none-     list     
## coefs             0    -none-     NULL     
## y                24    -none-     numeric  
## test              0    -none-     NULL     
## inbag             0    -none-     NULL     
## xNames           10    -none-     character
## problemType       1    -none-     character
## tuneValue         1    data.frame list     
## obsLevels         1    -none-     logical
# Ajustando o modelo
?expand.grid
?trainControl
controle1 <- trainControl(method = "cv", number = 10)

modelolm_v2 <- train(mpg ~ ., data = mtcars, method = "lm", 
                     trControl = controle1, 
                     metric = "Rsquared")

# Resumo do modelo
summary(modelolm_v2)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4506 -1.6044 -0.1196  1.2193  4.6271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 12.30337   18.71788   0.657   0.5181  
## cyl         -0.11144    1.04502  -0.107   0.9161  
## disp         0.01334    0.01786   0.747   0.4635  
## hp          -0.02148    0.02177  -0.987   0.3350  
## drat         0.78711    1.63537   0.481   0.6353  
## wt          -3.71530    1.89441  -1.961   0.0633 .
## qsec         0.82104    0.73084   1.123   0.2739  
## vs           0.31776    2.10451   0.151   0.8814  
## am           2.52023    2.05665   1.225   0.2340  
## gear         0.65541    1.49326   0.439   0.6652  
## carb        -0.19942    0.82875  -0.241   0.8122  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared:  0.869,  Adjusted R-squared:  0.8066 
## F-statistic: 13.93 on 10 and 21 DF,  p-value: 3.793e-07
# Coletando os residuos
residuals <- resid(modelolm)

# Previsoes
?predict
predictedValues <- predict(modelolm)
plot(dados_treino$mpg, predictedValues)

# Mostrando a importancia das variaveis para a criacao do modelo
?varImp
varImp(modelolm)
## lm variable importance
## 
##      Overall
## qsec 100.000
## wt    84.615
## vs    44.165
## drat  42.307
## am    34.402
## cyl   14.303
## gear   9.214
## carb   4.685
## disp   1.986
## hp     0.000
# Plot
plot(varImp(modelolm))