Download and Format Data

A couple of rules that will save you much debugging time:

  • Column names must not include “_“. Caret will error otherwise.
  • Run in parallel, it will save you a lot of time
  • Research the parameters, are they whole numbers, exponentials, logicals, strings? Make sure tuneGrids is modified to suit them.
download.file("http://archive.ics.uci.edu/ml/machine-learning-databases/00319/MHEALTHDATASET.zip", 
    destfile = "mHealth.zip")
unzip("mHealth.zip")
mHealth_subject1 <- read.delim("mHealth/mHealth_subject1.log", header = FALSE)
# read labels
mLabels <- readLines("mHealth/README.txt")[58:81]
# apply labels
mHealth_subject1 %<>% purrr::map2(.x = ., .y = mLabels, .f = function(.x, .y) {
    attr(.x, "label") <- str_extract(.y, "(?<=Column\\s\\d{1,2}\\:\\s).*")
    return(.x)
})
mHealth_subject1 %<>% as.data.frame()
# ----------------------- Sat Nov 10 21:14:08 2018 ------------------------# Add
# levels
Activities <- readLines("mHealth/README.txt")[37:48] %>% str_extract("(?<=\\:\\s).*")
names(Activities) <- readLines("mHealth/README.txt")[37:48] %>% str_extract("L\\d{1,}(?=\\:)")
mTest <- mHealth_subject1[not(mHealth_subject1$V24 == "0"), ]
unique(as.character(sort(mTest$V24)))
mTest$V24 %<>% factor(labels = gsub("^", "L", unique(as.character(sort(mTest$V24)))))
mTest$V24 %>% summary

Create Test and Training Set

mTrain <- mTest[caret::createDataPartition(mTest$V24, times = 1, p = 0.8) %>% .[[1]] %T>% 
    assign("trIndex", ., envir = .GlobalEnv), ]  # Create a training set with equal lengths of each of the response variables categories
mTest <- mTest[trIndex %in% rownames(mTest) %>% not %>% which, ]  # Create a test set that is not those rows

Refine Tuning Parameters for 2nd Run

load("nnet.Rdata")
best.Tunes <- lapply(nnetmod, FUN = function(x) {
    purrr::pluck(x, list("bestTune"))  # Take out the best tuning parameters from each iteration/repetition
})
tuneGrids <- purrr::map2(.x = names(best.Tunes), .y = nnetmod, bT = best.Tunes, function(.x, 
    .y, bT) {
    # Get the tuning parameters
    varnms <- names(bT[[.x]])
    # Sort the results by accuracy
    .y[["results"]] %<>% arrange(Accuracy)
    # Determine the two rows with the best results
    bestRes <- .y[["results"]][{
        nrow(.y[["results"]]) - 1
    }:nrow(.y[["results"]]), varnms]
    whole.num <- lapply(bestRes[, unlist(lapply(bestRes, is.numeric))], sfsmisc::is.whole) %>% 
        lapply(any) %>% unlist %>% which %>% names
    # Create a tuning grid from those values
    Params <- lapply(bestRes, FUN = function(l) {
        if (is.numeric(l)) {
            out <- quantile(l)  # if it's numeric take the quantile
        } else {
            out <- unique(l)  # if non numeric take the unique obs
        }
        return(out)
    }) %>% expand.grid()
    Params[, whole.num] %<>% lapply(round)  # if it needs to be a whole number round it to be such
    return(Params)
})
tuneGrids %<>% split(rep(1:3, 2)) %>% lapply(FUN = function(l) {
    out <- do.call("rbind", l)
    out <- out[not(duplicated(out)), ]
    
})  # Create a tuning grid data frame and remove duplicates
names(tuneGrids) <- names(best.Tunes) %>% gsub("\\.\\d\\d?", "", .) %>% unique
# Remove name artifacts

2nd Run with Refined Tuning Parameters

rv <- "V24" #enter response variable name
       iv <- NULL  # IF not using all vars, enter independent Variables as paste(c("var1","var2"),collapse="+")
       data <- mTrain # Enter Data Variable name here
       mod.packages <- c("caret","doParallel","iterators","parallel","foreach","caretEnsemble","nnet") # [package dependencies]
       startPkgs <- Vectorize(FUN=function(pkg){invisible(suppressPackageStartupMessages(library(pkg,character.only = T)))})
       startPkgs(mod.packages)
        registerDoParallel() # defaults to half of available
       sysNN1.mod <- system.time({
       data.train <- caret::createDataPartition(data[[rv]], times = 2, p=.85)
       data.train <- caret::trainControl(method="repeatedcv",
                                  index=data.train, 
                                  number=5,
                                  repeats=0, # 0 repeats
                                  search = "grid",
                                  allowParallel = T,
                                  summaryFunction=caret::defaultSummary,
                                  classProbs=T, 
                                  savePredictions = "final",
                                  returnResamp = "final",
                                  returnData = F,
                                  verboseIter = T # provide console output
                                  )
       if(!is.null(iv)){form <- as.formula(paste0(rv," ~ ",paste(iv,collapse="+")))}else {form <- as.formula(paste0(rv," ~ ."))}
       
       nnetmod <- caretEnsemble::caretList(form = form, 
                                            data = data,
                                            trControl = data.train,
                                            metric="Accuracy",
                                            methodList = c("avNNet","nnet","pcaNNet"), #[method list]
                                            tuneList = list("avNNet"=caretEnsemble::caretModelSpec( #[method specs] use tunegrids
                                              method="avNNet", tuneGrid = tuneGrids$avNNet),"nnet"=caretEnsemble::caretModelSpec(
                                              method="nnet", tuneGrid = tuneGrids$nnet),"pcaNNet"=caretEnsemble::caretModelSpec(
                                              method="pcaNNet", tuneGrid = tuneGrids$pcaNNet)))
       })
       registerDoSEQ()
       unloadPkgs <- Vectorize(FUN=function(pkg){detach(pkg,character.only = T)})
       unloadPkgs(mod.packages %>% gsub("^","package:",.))
save(nnetmod,sysNN1.mod,file = "nnetfinal.Rdata")
#rm(nnetmod)
gmailr::use_secret_file("googleusercontent.com.json")
  msg <- gmailr::mime(
    To = "7818797492@vtext.com",
    From = "spiritsynaesthetic@gmail.com",
    Subject = "Notification from R",
    body = paste("NNet 1 Complete"))
gmailr::send_message(msg)

Results

load("~/Northeastern/Fall 2018/HINF6400/Incentivized Insurance/cM.Rdata")
cM %>% print
## $positive
## NULL
## 
## $table
##           Reference
## Prediction   L1   L2   L3   L4   L5   L6   L7   L8   L9  L10  L11  L12
##        L1  3072    0    0    0    0    2    0    0    0    0    0    0
##        L2     0    0    0    0    0    0    0    0    0    0    0    0
##        L3     0   83 1438    0    0    0    0    0    0    5    1    0
##        L4     0 2173    0 1005    0   39    2    6    1   12    1    0
##        L5     0    0    0    1    0   50    6   39    2    2    6    0
##        L6     0    0    0    0    0    0    0    0    0    0    0    0
##        L7     0    0    0    5    0   11  716   65    0    0    0    0
##        L8     0    0    0    1    0 2361   87 3261    0    7    1    0
##        L9     0    0    0    0    0    0    0    0  710    4    0    0
##        L10    0    0    0    0    0    0    0    0    0 2653    5    0
##        L11    0    0    0    5    0    0    0    6    0  126  170    0
##        L12    0    0    0    7    0    0    0    2    2  160    5    0
## 
## $overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.7111269      0.6682215      0.7045026      0.7176841      0.1844835 
## AccuracyPValue  McnemarPValue 
##      0.0000000            NaN 
## 
## $byClass
##            Sensitivity Specificity Pos Pred Value Neg Pred Value Precision
## Class: L1    1.0000000   0.9998688      0.9993494      1.0000000 0.9993494
## Class: L2    0.0000000   1.0000000            NaN      0.8768290        NA
## Class: L3    1.0000000   0.9947269      0.9417158      1.0000000 0.9417158
## Class: L4    0.9814453   0.8708073      0.3102810      0.9987398 0.3102810
## Class: L5           NA   0.9942127             NA             NA 0.0000000
## Class: L6    0.0000000   1.0000000            NaN      0.8655274        NA
## Class: L7    0.8828607   0.9953728      0.8983689      0.9945773 0.8983689
## Class: L8    0.9650784   0.8355091      0.5703043      0.9906334 0.5703043
## Class: L9    0.9930070   0.9997727      0.9943978      0.9997159 0.9943978
## Class: L10   0.8935669   0.9996742      0.9981189      0.9798186 0.9981189
## Class: L11   0.8994709   0.9924422      0.5537459      0.9989450 0.5537459
## Class: L12          NA   0.9903909             NA             NA 0.0000000
##               Recall        F1 Prevalence Detection Rate
## Class: L1  1.0000000 0.9996746 0.16772221    0.167722210
## Class: L2  0.0000000        NA 0.12317100    0.000000000
## Class: L3  1.0000000 0.9699831 0.07851059    0.078510592
## Class: L4  0.9814453 0.4714989 0.05590740    0.054870059
## Class: L5         NA        NA 0.00000000    0.000000000
## Class: L6  0.0000000        NA 0.13447259    0.000000000
## Class: L7  0.8828607 0.8905473 0.04427823    0.039091505
## Class: L8  0.9650784 0.7169397 0.18448351    0.178041057
## Class: L9  0.9930070 0.9937019 0.03903691    0.038763922
## Class: L10 0.8935669 0.9429536 0.16209871    0.144846036
## Class: L11 0.8994709 0.6854839 0.01031885    0.009281503
## Class: L12        NA        NA 0.00000000    0.000000000
##            Detection Prevalence Balanced Accuracy
## Class: L1           0.167831404         0.9999344
## Class: L2           0.000000000         0.5000000
## Class: L3           0.083369731         0.9973634
## Class: L4           0.176839921         0.9261263
## Class: L5           0.005787290                NA
## Class: L6           0.000000000         0.5000000
## Class: L7           0.043513868         0.9391167
## Class: L8           0.312186067         0.9002938
## Class: L9           0.038982311         0.9963899
## Class: L10          0.145119022         0.9466205
## Class: L11          0.016761302         0.9459566
## Class: L12          0.009609085                NA
## 
## $mode
## [1] "sens_spec"
## 
## $dots
## list()
## 
## attr(,"class")
## [1] "confusionMatrix"
cM[["table"]] %>% kableExtra::kable("html", booktabs = T) %>% kableExtra::kable_styling(position = "center")
L1 L2 L3 L4 L5 L6 L7 L8 L9 L10 L11 L12
L1 3072 0 0 0 0 2 0 0 0 0 0 0
L2 0 0 0 0 0 0 0 0 0 0 0 0
L3 0 83 1438 0 0 0 0 0 0 5 1 0
L4 0 2173 0 1005 0 39 2 6 1 12 1 0
L5 0 0 0 1 0 50 6 39 2 2 6 0
L6 0 0 0 0 0 0 0 0 0 0 0 0
L7 0 0 0 5 0 11 716 65 0 0 0 0
L8 0 0 0 1 0 2361 87 3261 0 7 1 0
L9 0 0 0 0 0 0 0 0 710 4 0 0
L10 0 0 0 0 0 0 0 0 0 2653 5 0
L11 0 0 0 5 0 0 0 6 0 126 170 0
L12 0 0 0 7 0 0 0 2 2 160 5 0
cM[["overall"]] %>% kableExtra::kable("html", booktabs = T) %>% kableExtra::kable_styling(position = "center")
x
Accuracy 0.7111269
Kappa 0.6682215
AccuracyLower 0.7045026
AccuracyUpper 0.7176841
AccuracyNull 0.1844835
AccuracyPValue 0.0000000
McnemarPValue NaN