Download and Format Data

A couple of rules that will save you much debugging time:

Column names must not include “_“. Caret will error otherwise.
Run in parallel, it will save you a lot of time
Research the parameters, are they whole numbers, exponentials, logicals, strings? Make sure tuneGrids is modified to suit them.

download.file("http://archive.ics.uci.edu/ml/machine-learning-databases/00319/MHEALTHDATASET.zip", 
    destfile = "mHealth.zip")
unzip("mHealth.zip")
mHealth_subject1 <- read.delim("mHealth/mHealth_subject1.log", header = FALSE)
# read labels
mLabels <- readLines("mHealth/README.txt")[58:81]
# apply labels
mHealth_subject1 %<>% purrr::map2(.x = ., .y = mLabels, .f = function(.x, .y) {
    attr(.x, "label") <- str_extract(.y, "(?<=Column\\s\\d{1,2}\\:\\s).*")
    return(.x)
})
mHealth_subject1 %<>% as.data.frame()
# ----------------------- Sat Nov 10 21:14:08 2018 ------------------------# Add
# levels
Activities <- readLines("mHealth/README.txt")[37:48] %>% str_extract("(?<=\\:\\s).*")
names(Activities) <- readLines("mHealth/README.txt")[37:48] %>% str_extract("L\\d{1,}(?=\\:)")
mTest <- mHealth_subject1[not(mHealth_subject1$V24 == "0"), ]
unique(as.character(sort(mTest$V24)))
mTest$V24 %<>% factor(labels = gsub("^", "L", unique(as.character(sort(mTest$V24)))))
mTest$V24 %>% summary

Create Test and Training Set

mTrain <- mTest[caret::createDataPartition(mTest$V24, times = 1, p = 0.8) %>% .[[1]] %T>% 
    assign("trIndex", ., envir = .GlobalEnv), ]  # Create a training set with equal lengths of each of the response variables categories
mTest <- mTest[trIndex %in% rownames(mTest) %>% not %>% which, ]  # Create a test set that is not those rows

CaretEnsemble Neural Net Parameter Search

#https://youtu.be/aircAruvnKk?t=163 to 4:56 Overview
       rv <- "V24" #enter response variable name
       dv <- NULL  # IF not using all vars, enter Dependent Variables as paste(c("var1","var2"),collapse="+")
       data <- mTrain # Enter Data Variable name here
       mod.packages <- c("caret","doParallel","iterators","parallel","foreach","caretEnsemble","nnet") # [package dependencies]
       startPkgs <- Vectorize(FUN=function(pkg){suppressPackageStartupMessages(library(pkg,character.only = T))}) # Vectorized package init
       invisible(startPkgs(mod.packages)) #suppress console output for packages
       sysNN1.mod <- system.time({  #Time the model creation
       data.train <- caret::createDataPartition(data[[rv]], times = 2, p=.85) # create .85 of each grouping category for training purposes twice
       data.train <- caret::trainControl(method="repeatedcv", # repeated cross validation
                                  index=data.train, # Index is training set
                                  number=5, # number of iterations
                                  repeats=1, # number of repeats
                                  search = "grid", # Move through the combinations of parameters incrementally
                                  allowParallel = T, # allow parallel processing
                                  summaryFunction=caret::defaultSummary, #Use this summary function
                                  classProbs=T, # retain raw probabilities of predicted vlues
                                  savePredictions = "final", # Save final predictions for each iteration
                                  returnResamp = "final", # reurn summary metrics from each iteration
                                  returnData = F) # Do not return original data (recommended with large datasets)
       if(!is.null(dv)){form <- as.formula(paste0(rv," ~ ",paste(dv,collapse="+")))}else {form <- as.formula(paste0(rv," ~ ."))} # create  formula from dependent variables if provided, ortherwise use all
       cl <- makeCluster(detectCores()-1) # Allocate the number of parallel workers (here all but one CPU)
        registerDoParallel(cl) # register these CPUs
        getDoParWorkers()  # Make sure it worked
       nnetmod <- tryCatch(
                 {
             nnetmod <- caretEnsemble::caretList(form = form, #enter model name here
                                                  data = data,
                                                  trControl = data.train,
                                                  metric="Accuracy",
                                                  methodList = c("avNNet","nnet","pcaNNet"), #[method list]
                                                  tuneList = list("avNNet"=caretEnsemble::caretModelSpec( #[method specs]
                                                    method="avNNet", tuneGrid = tuneGrids),"nnet"=caretEnsemble::caretModelSpec(
                                                    method="nnet", tuneGrid = tuneGrids),"pcaNNet"=caretEnsemble::caretModelSpec(
                                                    method="pcaNNet", tuneGrid = tuneGrids)))
                   },
                 error=function(cond) {
                   message("Here's the original error message:")
                   message(cond)
                   gmailr::use_secret_file("googleusercontent.com.json")
                  msg <- gmailr::mime(
                  To = "7818797492@vtext.com",
                  From = "spiritsynaesthetic@gmail.com",
                  Subject = "Notification from R",
                  body = paste("Ensemble Build Error"))
                  gmailr::send_message(msg)
                   return(NA)
                   },
                   warning=function(cond) {
                   message("Here's the original warning message:")
                   message(cond)
                   # Choose a return value in case of warning
                   
                   },
                   finally={
                   # NOTE:
                   # Here goes everything that should be executed at the end,
                   # regardless of success or error.
                   # If you want more than one expression to be executed, then you 
                   # need to wrap them in curly brackets ({...}); otherwise you could
                   # just have written 'finally=<expression>'
                   stopCluster(cl)
                   registerDoSEQ() # Stop the parallel processing return to sequential
                   }
                 )
        
       })
        
       unloadPkgs <- Vectorize(FUN=function(pkg){invisible(detach(paste0("package:",pkg),character.only = T))})
       invisible(unloadPkgs(mod.packages)) # Unload packages
save(nnetmod,file = "nnet.Rdata") # save the file (recommended for long runtimes/large files)
#rm(nnetmod) RM from global environment if large file for smoother running
gmailr::use_secret_file("googleusercontent.com.json") #Send a text message indicating completion
  msg <- gmailr::mime(
    To = "7818797492@vtext.com",
    From = "spiritsynaesthetic@gmail.com",
    Subject = "Notification from R",
    body = paste("NNet 1 Complete"))
gmailr::send_message(msg)

Refine Tuning Parameters for 2nd Run

load("nnet.Rdata")
best.Tunes <- lapply(nnetmod, FUN = function(x) {
    purrr::pluck(x, list("bestTune"))  # Take out the best tuning parameters from each iteration/repetition
})
tuneGrids <- purrr::map2(.x = names(best.Tunes), .y = nnetmod, bT = best.Tunes, function(.x, 
    .y, bT) {
    # Get the tuning parameters
    varnms <- names(bT[[.x]])
    # Sort the results by accuracy
    .y[["results"]] %<>% arrange(Accuracy)
    # Determine the two rows with the best results
    bestRes <- .y[["results"]][{
        nrow(.y[["results"]]) - 1
    }:nrow(.y[["results"]]), varnms]
    whole.num <- lapply(bestRes[, unlist(lapply(bestRes, is.numeric))], sfsmisc::is.whole) %>% 
        lapply(any) %>% unlist %>% which %>% names
    # Create a tuning grid from those values
    Params <- lapply(bestRes, FUN = function(l) {
        if (is.numeric(l)) {
            out <- quantile(l)  # if it's numeric take the quantile
        } else {
            out <- unique(l)  # if non numeric take the unique obs
        }
        return(out)
    }) %>% expand.grid()
    Params[, whole.num] %<>% lapply(round)  # if it needs to be a whole number round it to be such
    return(Params)
})
tuneGrids %<>% split(rep(1:3, 2)) %>% lapply(FUN = function(l) {
    out <- do.call("rbind", l)
    out <- out[not(duplicated(out)), ]
    
})  # Create a tuning grid data frame and remove duplicates
names(tuneGrids) <- names(best.Tunes) %>% gsub("\\.\\d\\d?", "", .) %>% unique
# Remove name artifacts

2nd Run with Refined Tuning Parameters

rv <- "V24" #enter response variable name
       iv <- NULL  # IF not using all vars, enter independent Variables as paste(c("var1","var2"),collapse="+")
       data <- mTrain # Enter Data Variable name here
       mod.packages <- c("caret","doParallel","iterators","parallel","foreach","caretEnsemble","nnet") # [package dependencies]
       startPkgs <- Vectorize(FUN=function(pkg){invisible(suppressPackageStartupMessages(library(pkg,character.only = T)))})
       startPkgs(mod.packages)
        registerDoParallel() # defaults to half of available
       sysNN1.mod <- system.time({
       data.train <- caret::createDataPartition(data[[rv]], times = 2, p=.85)
       data.train <- caret::trainControl(method="repeatedcv",
                                  index=data.train, 
                                  number=5,
                                  repeats=0, # 0 repeats
                                  search = "grid",
                                  allowParallel = T,
                                  summaryFunction=caret::defaultSummary,
                                  classProbs=T, 
                                  savePredictions = "final",
                                  returnResamp = "final",
                                  returnData = F,
                                  verboseIter = T # provide console output
                                  )
       if(!is.null(iv)){form <- as.formula(paste0(rv," ~ ",paste(iv,collapse="+")))}else {form <- as.formula(paste0(rv," ~ ."))}
       
       nnetmod <- caretEnsemble::caretList(form = form, 
                                            data = data,
                                            trControl = data.train,
                                            metric="Accuracy",
                                            methodList = c("avNNet","nnet","pcaNNet"), #[method list]
                                            tuneList = list("avNNet"=caretEnsemble::caretModelSpec( #[method specs] use tunegrids
                                              method="avNNet", tuneGrid = tuneGrids$avNNet),"nnet"=caretEnsemble::caretModelSpec(
                                              method="nnet", tuneGrid = tuneGrids$nnet),"pcaNNet"=caretEnsemble::caretModelSpec(
                                              method="pcaNNet", tuneGrid = tuneGrids$pcaNNet)))
       })
       registerDoSEQ()
       unloadPkgs <- Vectorize(FUN=function(pkg){detach(pkg,character.only = T)})
       unloadPkgs(mod.packages %>% gsub("^","package:",.))
save(nnetmod,sysNN1.mod,file = "nnetfinal.Rdata")
#rm(nnetmod)
gmailr::use_secret_file("googleusercontent.com.json")
  msg <- gmailr::mime(
    To = "7818797492@vtext.com",
    From = "spiritsynaesthetic@gmail.com",
    Subject = "Notification from R",
    body = paste("NNet 1 Complete"))
gmailr::send_message(msg)

Results

load("~/Northeastern/Fall 2018/HINF6400/Incentivized Insurance/cM.Rdata")
cM %>% print

## $positive
## NULL
## 
## $table
##           Reference
## Prediction   L1   L2   L3   L4   L5   L6   L7   L8   L9  L10  L11  L12
##        L1  3072    0    0    0    0    2    0    0    0    0    0    0
##        L2     0    0    0    0    0    0    0    0    0    0    0    0
##        L3     0   83 1438    0    0    0    0    0    0    5    1    0
##        L4     0 2173    0 1005    0   39    2    6    1   12    1    0
##        L5     0    0    0    1    0   50    6   39    2    2    6    0
##        L6     0    0    0    0    0    0    0    0    0    0    0    0
##        L7     0    0    0    5    0   11  716   65    0    0    0    0
##        L8     0    0    0    1    0 2361   87 3261    0    7    1    0
##        L9     0    0    0    0    0    0    0    0  710    4    0    0
##        L10    0    0    0    0    0    0    0    0    0 2653    5    0
##        L11    0    0    0    5    0    0    0    6    0  126  170    0
##        L12    0    0    0    7    0    0    0    2    2  160    5    0
## 
## $overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7111269      0.6682215      0.7045026      0.7176841      0.1844835 
## AccuracyPValue  McnemarPValue 
##      0.0000000            NaN 
## 
## $byClass
##            Sensitivity Specificity Pos Pred Value Neg Pred Value Precision
## Class: L1    1.0000000   0.9998688      0.9993494      1.0000000 0.9993494
## Class: L2    0.0000000   1.0000000            NaN      0.8768290        NA
## Class: L3    1.0000000   0.9947269      0.9417158      1.0000000 0.9417158
## Class: L4    0.9814453   0.8708073      0.3102810      0.9987398 0.3102810
## Class: L5           NA   0.9942127             NA             NA 0.0000000
## Class: L6    0.0000000   1.0000000            NaN      0.8655274        NA
## Class: L7    0.8828607   0.9953728      0.8983689      0.9945773 0.8983689
## Class: L8    0.9650784   0.8355091      0.5703043      0.9906334 0.5703043
## Class: L9    0.9930070   0.9997727      0.9943978      0.9997159 0.9943978
## Class: L10   0.8935669   0.9996742      0.9981189      0.9798186 0.9981189
## Class: L11   0.8994709   0.9924422      0.5537459      0.9989450 0.5537459
## Class: L12          NA   0.9903909             NA             NA 0.0000000
##               Recall        F1 Prevalence Detection Rate
## Class: L1  1.0000000 0.9996746 0.16772221    0.167722210
## Class: L2  0.0000000        NA 0.12317100    0.000000000
## Class: L3  1.0000000 0.9699831 0.07851059    0.078510592
## Class: L4  0.9814453 0.4714989 0.05590740    0.054870059
## Class: L5         NA        NA 0.00000000    0.000000000
## Class: L6  0.0000000        NA 0.13447259    0.000000000
## Class: L7  0.8828607 0.8905473 0.04427823    0.039091505
## Class: L8  0.9650784 0.7169397 0.18448351    0.178041057
## Class: L9  0.9930070 0.9937019 0.03903691    0.038763922
## Class: L10 0.8935669 0.9429536 0.16209871    0.144846036
## Class: L11 0.8994709 0.6854839 0.01031885    0.009281503
## Class: L12        NA        NA 0.00000000    0.000000000
##            Detection Prevalence Balanced Accuracy
## Class: L1           0.167831404         0.9999344
## Class: L2           0.000000000         0.5000000
## Class: L3           0.083369731         0.9973634
## Class: L4           0.176839921         0.9261263
## Class: L5           0.005787290                NA
## Class: L6           0.000000000         0.5000000
## Class: L7           0.043513868         0.9391167
## Class: L8           0.312186067         0.9002938
## Class: L9           0.038982311         0.9963899
## Class: L10          0.145119022         0.9466205
## Class: L11          0.016761302         0.9459566
## Class: L12          0.009609085                NA
## 
## $mode
## [1] "sens_spec"
## 
## $dots
## list()
## 
## attr(,"class")
## [1] "confusionMatrix"

cM[["table"]] %>% kableExtra::kable("html", booktabs = T) %>% kableExtra::kable_styling(position = "center")

	L1	L2	L3	L4	L6	L7	L8	L9	L10	L11
L1	3072	0	0	0	2	0	0	0	0	0
L2	0	0	0	0	0	0	0	0	0	0
L3	0	83	1438	0	0	0	0	0	5	1
L4	0	2173	0	1005	39	2	6	1	12	1
L5	0	0	0	1	50	6	39	2	2	6
L6	0	0	0	0	0	0	0	0	0	0
L7	0	0	0	5	11	716	65	0	0	0
L8	0	0	0	1	2361	87	3261	0	7	1
L9	0	0	0	0	0	0	0	710	4	0
L10	0	0	0	0	0	0	0	0	2653	5
L11	0	0	0	5	0	0	6	0	126	170
L12	0	0	0	7	0	0	2	2	160	5

cM[["overall"]] %>% kableExtra::kable("html", booktabs = T) %>% kableExtra::kable_styling(position = "center")

	x
Accuracy	0.7111269
Kappa	0.6682215
AccuracyLower	0.7045026
AccuracyUpper	0.7176841
AccuracyNull	0.1844835
AccuracyPValue	0.0000000
McnemarPValue	NaN

NNet Tutorial ReprEx

Stephen Synchronicity

2018-11-27

Download and Format Data

Create Test and Training Set

CaretEnsemble Neural Net Parameter Search

Refine Tuning Parameters for 2nd Run

2nd Run with Refined Tuning Parameters

Results