Data Cleaning

df <- read.spss("C:\\Users\\Stephen\\Documents\\Veginvesting\\Evan Project\\Parker_Research_Data.sav", 
    to.data.frame = TRUE)
df2 <- readxl::read_xlsx("C:\\Users\\Stephen\\Documents\\Veginvesting\\Evan Project\\Food Choice Sustainability Human Health and Morality_All Responses.xlsx")
qKey <- names(df2)

df <- as.data.frame(apply(df, 2, function(x) {
    gsub("\\s?Strongly\\sAgree\\s?", 5, x)
}))
df <- as.data.frame(apply(df, 2, function(x) {
    gsub("\\s?Strongly\\sDisagree\\s?", 1, x)
}))
df[, 5:61] <- apply(df[, 5:61], 2, function(x) {
    as.numeric(x)
})
# Code NAs towards Central Tendency (neutral answer)
row.names(df) <- df$ID
df <- df[, -1]
# ------------------- Thu Feb 01 15:27:00 2018 --------------------# Code
# interest variable as numeric
codeinterest <- function(x) {
    lgl <- vector()
    lgl[1] <- str_detect(x, "I have no")
    lgl[2] <- str_detect(x, "I have some")
    lgl[3] <- str_detect(x, "I would like")
    lgl[4] <- str_detect(x, "I am currently")
    lgl[5] <- str_detect(x, "I am already")
    out <- which(lgl)
    return(out)
}
df[, 3] <- map_int(df[, 3], codeinterest)
# omit consent
df <- df[, -1]
# dummy code group
codegroup <- function(x) {
    lgl <- vector("logical", length = 5)
    lgl[1] <- str_detect(x, "Mountain")
    lgl[3] <- str_detect(x, "Other")
    lgl[5] <- str_detect(x, "Brother")
    out <- which(lgl)
    return(out)
}
df[["Group"]] <- map_int(df[["Group"]], codegroup)
# Dummy code Race
coderace <- function(x) {
    lgl <- vector("logical", length = 5)
    lgl[1] <- str_detect(x, "White")
    lgl[2] <- str_detect(x, "Other")
    lgl[3] <- str_detect(x, "Latino")
    lgl[4] <- str_detect(x, "Asian")
    lgl[5] <- str_detect(x, "Indian")
    out <- which(lgl)
    return(out)
}
df[["Race"]] <- map_int(df[["Race"]], coderace)
# Dummy Code Gender
codegen <- function(x) {
    lgl <- vector("logical", length = 5)
    lgl[1] <- str_detect(x, "Female")
    lgl[3] <- is.na(x)
    lgl[5] <- str_detect(x, "Male")
    out <- which(lgl)
    return(out)
}
df[["Gender"]] <- map_int(df[["Gender"]], codegen)
# Dummy code age
codeage <- function(x) {
    lgl <- vector("logical", length = 5)
    lgl[1] <- str_detect(x, "18-24")
    lgl[2] <- str_detect(x, "25-34")
    lgl[3] <- str_detect(x, "35-44")
    lgl[4] <- str_detect(x, "45-54")
    lgl[5] <- str_detect(x, "55-64")
    lgl[6] <- str_detect(x, "65+")
    out <- which(lgl)
    return(out)
}
df[["Age_Cohort"]] <- map_int(df[["Age_Cohort"]], codeage)
library(scales)
df[["Age_Cohort"]] <- rescale(df[["Age_Cohort"]], to = c(1, 5))
df <- as.data.frame(df)
qKey <- data.frame(Short = names(df), Long = names(df2[-c(1:10)])[1:63], stringsAsFactors = F)
write_csv(df, "EPData.csv")
df <- read.csv("EPData.csv", stringsAsFactors = T)

Barriers <- df[, -c(29:55)] %>% filter(Diet != "Vegan")
Benefits <- df[, -c(3:28)]

BarH <- c("There are not enough vitamins and minerals in vegan diets", "I wouldn’t (or don’t) get enough energy or strength from vegan food", 
    "There is not enough protein in vegan diets", "I would be (or am) worried about my health")
BarP <- c("I like eating meat", "I think humans are meant to eat animal products (meat, fish, dairy, or eggs)", 
    "Vegan diets are boring", "Vegan diets are not filling enough", "I like dairy products and/or eggs")
BarC <- c("It is inconvenient", "It takes too long to prepare vegan food", "Vegan options are not available where I shop or at my home", 
    "I don’t have enough willpower")
BarK <- c("I don’t know what to eat instead of animal products", "I lack the right cooking skills", 
    "I need more information about vegan diets")
BarN <- c("My friends eat animal products", "I don’t want people to stereotype me negatively (e.g. that I must be strange)", 
    "My family eats animal products", "My family/spouse/partner won’t eat vegan food")
BarC.key <- qKey[c(apply(sapply(substr(BarC, 0, 20), FUN = str_detect, string = qKey$Long, 
    simplify = T), 2, FUN = which)), ]
BarH.key <- qKey[c(25:28), ]
BarK.key <- qKey[c(apply(sapply(substr(BarK, 0, 20), FUN = str_detect, string = qKey$Long, 
    simplify = T), 2, FUN = which)), ]
BarN.key <- qKey[c(apply(sapply(substr(BarN, 0, 20), FUN = str_detect, string = qKey$Long, 
    simplify = T), 2, FUN = which)), ]
BarP.key <- qKey[c(apply(sapply(substr(BarP, 0, 20), FUN = str_detect, string = qKey$Long, 
    simplify = T), 2, FUN = which)), ]

BenME <- c("Reduce greenhouse gas emissions and contributions to climate change", 
    "Help eliminate animal exploitation", "Help animal rights", "Reduce deforestation and land-use change", 
    "Help animal welfare", "Reduce water pollution", "Decrease world hunger", "Increase the efficiency of food production", 
    "Create a more peaceful world")
BenME.key <- qKey[qKey$Long %in% BenME, ]
BenH <- c("Be fit", "Have a better quality of life", "Have plenty of energy", "Increase my control over my own health", 
    "Live longer", "Stay healthy", "Prevent disease in general (e.g. heart disease, cancer)", 
    "Eat more fruit and vegetables", "Be healthier by decreasing my intake of chemicals, steroids and antibiotics which are found in animal products", 
    "Lower my chances of getting food poisoning", "Decrease saturated fat intake in my diet")
BenH.key <- qKey[qKey$Long %in% BenH, ]

Impute Values with Amelia

# ----------------------- Sun Mar 04 17:27:45 2018 ------------------------#
# Amelia assumes a normal distribution in each attribute, and attempts to replace
# values while preserving variance. See http://gking.harvard.edu/amelia for
# details.
library(Amelia)
levels(Barriers$Diet)

## [1] "Flexitarian" "Omnivore"    "Pescetarian" "Vegan"       "Vegetarian"

is.ordered(Barriers$Diet)

## [1] FALSE

# ----------------------- Sun Mar 04 17:03:34 2018 ------------------------# Make
# Diet an ordinal variable
Barriers$Diet <- factor(Barriers$Diet, levels = c("Omnivore", "Flexitarian", "Pescetarian", 
    "Vegetarian", "Vegan"), ordered = T)
Benefits$Diet <- factor(Benefits$Diet, levels = c("Omnivore", "Flexitarian", "Pescetarian", 
    "Vegetarian", "Vegan"), ordered = T)
nmBarriers <- names(Barriers)
(Categorical <- nmBarriers[c(1, 2, 29:36)])

##  [1] "Diet"              "Interest_Veganism" "Environmentalist" 
##  [4] "Feminist"          "SJA"               "Animal_Lover"     
##  [7] "Group"             "Race"              "Gender"           
## [10] "Age_Cohort"

numBarriers <- Barriers %>% select(-one_of(Categorical))
Barriers.imputed <- Amelia::amelia(Barriers, m = 1, p2s = 2, idvars = Categorical)

## 
## amelia starting
## beginning prep functions
## Variables used:  Question4 Question5 Question6 Question7 Question8 Question9 Question10 Question11 Question12 Question13 Question14 Question15 Question16 Question17 Question18 Question19 Question20 Question21 Question22 Question23 Question24 Question25 Question26 Question27 Question28 Question29 
## running bootstrap
## -- Imputation 1 --
## setting up EM chain indicies
## 
##   1(377)  2(208)  3(192)  4(124)  5(98)  6(97)  7(93)  8(83)  9(71) 10(57) 11(46) 12(32) 13(15) 14(7) 15(1) 16(0)
## 
##  saving and cleaning

# Show # of imputed vars
sum(Barriers.imputed[["missMatrix"]])

## [1] 22

# Save the matrix with complete data
iBarriers <- Barriers.imputed[["imputations"]][["imp1"]]
# ----------------------- Sun Mar 04 17:25:26 2018 ------------------------#
# Impute values in Benefits dataset
Benefits.imputed <- Amelia::amelia(Benefits, m = 1, p2s = 2, idvars = Categorical)

## 
## amelia starting
## beginning prep functions
## Variables used:  Question30 Question31 Question32 Question33 Question34 Question35 Question36 Question37 Question38 Question39 Question40 Question41 Question42 Question43 Question44 Question45 Question46 Question47 Question48 Question49 Question50 Question51 Question52 Question53 Question54 Question55 Question56 
## running bootstrap
## -- Imputation 1 --
## setting up EM chain indicies
## 
##   1(405)  2(311)  3(67)  4(55)  5(40)  6(11)  7(0)
## 
##  saving and cleaning

sum(Benefits.imputed[["missMatrix"]])

## [1] 31

iBenefits <- Benefits.imputed[["imputations"]][["imp1"]]

Original Factor analysis run blind to explanation of data is

colnames(dfNum) <- c("Id", qKey$Short[13:69])
psych::fa.parallel(dfNum[, -c(1)], fa = "pc", main = "# of Factors", n.iter = 10, 
    use = "pairwise", quant = 0.95)

fadfNum <- psych::fa(dfNum[, -c(1)], nfactors = 4, n.iter = 10, residuals = T, alpha = 0.05, 
    fm = "ml")
dsNumF1 <- as.data.frame(cbind(Lvl = c(rep("H", 5), rep("T", 5)), Q = c(names(head(fadfNum$loadings[, 
    1][order(fadfNum$loadings[, 1], decreasing = T)], 5)), names(tail(fadfNum$loadings[, 
    1][order(fadfNum$loadings[, 1], decreasing = T)], 5)))))
dsNumF2 <- as.data.frame(cbind(Lvl = c(rep("H", 5), rep("T", 5)), Q = c(names(head(fadfNum$loadings[, 
    2][order(fadfNum$loadings[, 2], decreasing = T)], 5)), names(tail(fadfNum$loadings[, 
    2][order(fadfNum$loadings[, 2], decreasing = T)], 5)))))
dsNumF3 <- as.data.frame(cbind(Lvl = c(rep("H", 5), rep("T", 5)), Q = c(names(head(fadfNum$loadings[, 
    3][order(fadfNum$loadings[, 3], decreasing = T)], 5)), names(tail(fadfNum$loadings[, 
    3][order(fadfNum$loadings[, 3], decreasing = T)], 5)))))
dsNumF4 <- as.data.frame(cbind(Lvl = c(rep("H", 5), rep("T", 5)), Q = c(names(head(fadfNum$loadings[, 
    4][order(fadfNum$loadings[, 4], decreasing = T)], 5)), names(tail(fadfNum$loadings[, 
    4][order(fadfNum$loadings[, 4], decreasing = T)], 5)))))

DT::datatable(dsNumF1)
DT::datatable(dsNumF2)
DT::datatable(dsNumF3)
DT::datatable(dsNumF4)

Factor Analysis

FA: Barriers

psych::fa.parallel(iBarriers %>% select(-one_of(Categorical)), fa = "both", n.iter = 4)

## Parallel analysis suggests that the number of factors =  5  and the number of components =  5

# It looks like the parallel test suggests 5 factors/components, excellent.
# Model Evaluation: To choose a factoring method 'fm' we will use an iterative
# approach that runs with each type and examine the R^2 values of each method to
# determine which model best explains the data.
nfac <- 5
fm <- c("minres", "pa", "wls", "gls", "ml")
faResults <- vector("list", 5)
faR2Mat <- matrix(data = rep(NA, 25), nrow = 5, ncol = nfac)
for (i in seq_along(fm)) {
    Result <- psych::fa(iBarriers %>% select(-one_of(Categorical)), nfactors = nfac, 
        max.iter = 10, alpha = 0.1, fm = fm[i], warnings = T)
    faR2Mat[i, ] <- Result[["R2.scores"]]
}
# Sum the rows to determine which explains the data the best
rowMeans(faR2Mat)

## [1] 0.8373863 0.8371888 0.8419728 0.8474469 0.8381363

# Additional trials show stable R^2 values. Thus, it appears that the
# 'generalized weighted least squares' factoring method best explains the data,
# we will use this model for further exploration.
faBestModel <- psych::fa(iBarriers %>% select(-one_of(Categorical)), nfactors = nfac, 
    max.iter = 10, alpha = 0.1, fm = "gls", warnings = T)
psych::alpha(iBarriers %>% select(-one_of(Categorical)))

## 
## Reliability analysis   
## Call: psych::alpha(x = iBarriers %>% select(-one_of(Categorical)))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd
##       0.84      0.84    0.91      0.17 5.3 0.023  2.5 0.55
## 
##  lower alpha upper     95% confidence boundaries
## 0.8 0.84 0.89 
## 
##  Reliability if an item is dropped:
##            raw_alpha std.alpha G6(smc) average_r S/N alpha se
## Question4       0.84      0.83    0.90      0.17 5.0    0.025
## Question5       0.84      0.84    0.91      0.17 5.1    0.024
## Question6       0.84      0.84    0.91      0.17 5.3    0.023
## Question7       0.84      0.84    0.91      0.18 5.4    0.023
## Question8       0.84      0.84    0.91      0.17 5.1    0.024
## Question9       0.84      0.84    0.91      0.17 5.2    0.024
## Question10      0.84      0.84    0.91      0.18 5.4    0.023
## Question11      0.84      0.84    0.91      0.17 5.2    0.024
## Question12      0.84      0.84    0.91      0.18 5.3    0.024
## Question13      0.83      0.83    0.90      0.16 4.9    0.025
## Question14      0.83      0.83    0.90      0.16 4.8    0.026
## Question15      0.84      0.84    0.91      0.17 5.2    0.024
## Question16      0.84      0.84    0.91      0.18 5.3    0.023
## Question17      0.84      0.83    0.91      0.17 5.0    0.024
## Question18      0.84      0.84    0.91      0.18 5.3    0.023
## Question19      0.83      0.83    0.91      0.16 4.9    0.025
## Question20      0.84      0.83    0.91      0.17 5.0    0.024
## Question21      0.83      0.83    0.91      0.16 4.9    0.025
## Question22      0.84      0.84    0.91      0.17 5.2    0.024
## Question23      0.84      0.83    0.91      0.17 5.0    0.025
## Question24      0.84      0.84    0.91      0.17 5.1    0.024
## Question25      0.84      0.84    0.91      0.17 5.1    0.024
## Question26      0.83      0.83    0.91      0.17 4.9    0.025
## Question27      0.83      0.83    0.91      0.16 4.8    0.025
## Question28      0.84      0.84    0.91      0.17 5.1    0.024
## Question29      0.83      0.83    0.91      0.16 4.9    0.025
## 
##  Item statistics 
##             n raw.r std.r r.cor r.drop mean   sd
## Question4  90  0.54  0.52  0.51   0.46  3.5 1.52
## Question5  90  0.46  0.42  0.41   0.36  3.1 1.58
## Question6  90  0.32  0.32  0.28   0.23  2.6 1.39
## Question7  90  0.21  0.26  0.22   0.17  1.3 0.66
## Question8  90  0.43  0.44  0.41   0.35  2.3 1.37
## Question9  90  0.34  0.34  0.32   0.27  1.6 1.05
## Question10 90  0.17  0.22  0.17   0.13  1.2 0.49
## Question11 90  0.40  0.38  0.37   0.31  3.4 1.41
## Question12 90  0.27  0.27  0.22   0.21  4.4 0.88
## Question13 90  0.62  0.62  0.62   0.57  2.3 1.10
## Question14 90  0.67  0.67  0.68   0.63  2.4 1.20
## Question15 90  0.42  0.41  0.38   0.34  3.1 1.36
## Question16 90  0.30  0.30  0.27   0.23  2.9 1.15
## Question17 90  0.51  0.51  0.50   0.44  3.5 1.19
## Question18 90  0.28  0.29  0.25   0.20  2.2 1.17
## Question19 90  0.59  0.60  0.59   0.54  2.4 1.07
## Question20 90  0.49  0.49  0.46   0.42  2.6 1.36
## Question21 90  0.58  0.58  0.57   0.52  1.9 1.13
## Question22 90  0.38  0.38  0.35   0.30  3.2 1.21
## Question23 90  0.53  0.53  0.51   0.46  2.5 1.35
## Question24 90  0.44  0.45  0.43   0.36  2.0 1.19
## Question25 90  0.46  0.47  0.45   0.39  2.1 1.25
## Question26 90  0.56  0.56  0.56   0.50  2.2 1.14
## Question27 90  0.65  0.64  0.64   0.59  2.5 1.34
## Question28 90  0.45  0.44  0.41   0.36  2.6 1.38
## Question29 90  0.58  0.58  0.58   0.52  2.5 1.21

# ----------------------- Sun Mar 04 17:47:26 2018 ------------------------#
# TODO(Examine the factors, look at H/T to determine question correlates and see
# if the questions with high weights correspond to Evan's. If not, what do the
# patterns in this analysis suggest about other potential components to explain
# the data?)
Barriers.Qs <- names(df2)[11:38]
loadings2pval <- function(loadings) {
    # function input - loadings from PCA or FA, output - labelled pvalues for
    # loadings
    c <- ncol(loadings)
    if (is.null(c)) {
        c <- 1
    }
    if (c > 1) {
        pvals <- loadings %>% unclass %>% apply(2, FUN = scale)
    } else {
        pvals <- loadings %>% unclass %>% scale
    }
    for (i in 1:c) {
        m = mean(pvals[, i])
        sd = sd(pvals[, i])
        for (r in seq_along(pvals[, i])) {
            pvals[r, i] <- 2 * pnorm(-abs(pvals[r, i]), m = m, sd = sd)
        }
    }
    rownames(pvals) <- rownames(loadings)
    return(pvals)
}
pvals.fa <- loadings2pval(faBestModel$loadings)
loadTotals <- as.data.frame(cbind(faBestModel[["Structure"]], Sum = rowSums(apply(abs(faBestModel[["Structure"]]), 
    2, FUN = abs))))
loadProps <- loadTotals %>% mutate_at(vars(c(1:nfac)), function(x) {
    abs(x)/loadTotals$Sum
}) %>% cbind(oQues = rownames(loadTotals))
faLoadings <- vector("list", nfac)
for (i in 1:nfac) {
    l <- faBestModel[["loadings"]] %>% unclass %>% .[, paste("GLS", i, sep = "")]
    h <- head(l[order(l, decreasing = T)], 5)
    t <- tail(l[order(l, decreasing = T)], 5)
    faLoadings[[i]] <- data.frame(oQues = c(names(h), names(t)), Loadings = c(h, 
        t)) %>% inner_join(qKey, by = c(oQues = "Short")) %>% mutate(pVal = pvals.fa[oQues, 
        paste("GLS", i, sep = "")]) %>% inner_join(loadProps)
}
# ----------------------- Mon Mar 05 07:38:49 2018 ------------------------# ML
# columns provide the proportion of load shared between variables

DT::datatable(faLoadings[[1]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(faLoadings[[2]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(faLoadings[[3]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(faLoadings[[4]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(faLoadings[[5]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

FA: Benefits

psych::fa.parallel(iBenefits %>% select(-one_of(Categorical)), fa = "both", n.iter = 4)

## Parallel analysis suggests that the number of factors =  3  and the number of components =  2

# It looks like the parallel test suggests 3 factors, we will try 2 and 3 to
# compare it with Evans.  Model Evaluation: To choose a factoring method 'fm' we
# will use an iterative approach that runs with each type and examine the R^2
# values of each method to determine which model best explains the data.
nfac <- 3
fm <- c("minres", "pa", "wls", "gls", "ml")
faBenR2Mat <- matrix(data = rep(NA, 9), nrow = 5, ncol = nfac)
for (i in seq_along(fm)) {
    Result <- psych::fa(iBenefits %>% select(-one_of(Categorical)), nfactors = nfac, 
        max.iter = 10, alpha = 0.1, fm = fm[i], warnings = T)
    faBenR2Mat[i, ] <- Result[["R2.scores"]]
}
# Sum the rows to determine which explains the data the best
rowSums(faBenR2Mat)

## [1] 2.781406 2.781063 2.802299 2.857590 2.875187

# Additional trials show stable R^2 values. Thus, it appears that the 'maximum
# likelihood' factoring method best explains the data, we will use this model for
# further exploration.
faBenBestModel <- psych::fa(iBenefits %>% select(-one_of(Categorical)), nfactors = nfac, 
    max.iter = 10, alpha = 0.1, fm = "ml", warnings = T)
psych::alpha(iBenefits %>% select(-one_of(Categorical)))

## 
## Reliability analysis   
## Call: psych::alpha(x = iBenefits %>% select(-one_of(Categorical)))
## 
##   raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd
##       0.96      0.96    0.98      0.48  24 0.0056  3.5 0.87
## 
##  lower alpha upper     95% confidence boundaries
## 0.95 0.96 0.97 
## 
##  Reliability if an item is dropped:
##            raw_alpha std.alpha G6(smc) average_r S/N alpha se
## Question30      0.96      0.96    0.98      0.47  23   0.0058
## Question31      0.96      0.96    0.98      0.47  23   0.0058
## Question32      0.96      0.96    0.98      0.47  23   0.0058
## Question33      0.96      0.96    0.98      0.47  23   0.0058
## Question34      0.96      0.96    0.98      0.47  23   0.0058
## Question35      0.96      0.96    0.98      0.48  24   0.0056
## Question36      0.96      0.96    0.98      0.48  24   0.0057
## Question37      0.96      0.96    0.98      0.47  23   0.0059
## Question38      0.96      0.96    0.98      0.47  23   0.0060
## Question39      0.96      0.96    0.98      0.48  24   0.0056
## Question40      0.96      0.96    0.98      0.47  24   0.0058
## Question41      0.96      0.96    0.98      0.47  23   0.0058
## Question42      0.96      0.96    0.98      0.47  23   0.0060
## Question43      0.96      0.96    0.98      0.48  24   0.0057
## Question44      0.96      0.96    0.98      0.48  24   0.0058
## Question45      0.96      0.96    0.98      0.48  24   0.0057
## Question46      0.96      0.96    0.98      0.47  23   0.0060
## Question47      0.96      0.96    0.98      0.48  24   0.0056
## Question48      0.96      0.96    0.98      0.48  24   0.0057
## Question49      0.96      0.96    0.98      0.47  23   0.0058
## Question50      0.96      0.96    0.98      0.48  24   0.0057
## Question51      0.96      0.96    0.98      0.48  24   0.0057
## Question52      0.96      0.96    0.98      0.48  24   0.0058
## Question53      0.96      0.96    0.98      0.48  24   0.0057
## Question54      0.96      0.96    0.98      0.48  24   0.0058
## Question55      0.96      0.96    0.98      0.47  23   0.0058
## Question56      0.96      0.96    0.98      0.49  25   0.0055
## 
##  Item statistics 
##              n raw.r std.r r.cor r.drop mean   sd
## Question30 106  0.73  0.74  0.74   0.70  3.8 1.16
## Question31 106  0.71  0.72  0.72   0.68  3.7 1.18
## Question32 106  0.75  0.76  0.76   0.73  3.7 1.15
## Question33 106  0.74  0.75  0.74   0.72  3.9 1.17
## Question34 106  0.72  0.72  0.70   0.69  3.1 1.36
## Question35 106  0.55  0.56  0.54   0.52  4.4 0.92
## Question36 106  0.57  0.59  0.57   0.55  4.2 0.97
## Question37 106  0.81  0.82  0.82   0.80  3.5 1.12
## Question38 106  0.85  0.86  0.86   0.84  3.3 1.29
## Question39 106  0.61  0.62  0.61   0.57  3.5 1.24
## Question40 106  0.71  0.71  0.71   0.68  3.2 1.29
## Question41 106  0.77  0.78  0.78   0.75  3.3 1.20
## Question42 106  0.84  0.84  0.84   0.82  3.2 1.42
## Question43 106  0.69  0.69  0.69   0.66  4.1 1.22
## Question44 106  0.70  0.70  0.70   0.68  4.1 1.22
## Question45 106  0.70  0.70  0.70   0.67  4.0 1.29
## Question46 106  0.82  0.81  0.81   0.80  3.3 1.43
## Question47 106  0.60  0.59  0.57   0.56  2.5 1.55
## Question48 106  0.67  0.66  0.65   0.64  2.2 1.23
## Question49 106  0.75  0.74  0.73   0.72  3.2 1.31
## Question50 106  0.67  0.66  0.66   0.64  3.8 1.27
## Question51 106  0.70  0.69  0.69   0.67  3.7 1.29
## Question52 106  0.72  0.71  0.71   0.69  3.8 1.22
## Question53 106  0.63  0.63  0.61   0.60  3.7 1.16
## Question54 106  0.71  0.71  0.70   0.68  3.3 1.24
## Question55 106  0.74  0.74  0.74   0.72  2.8 1.24
## Question56 106  0.51  0.51  0.49   0.47  2.9 1.27

# ----------------------- Mon Mar 05 08:52:05 2018 ------------------------# Calc
# benefits

pvals.fa <- loadings2pval(faBenBestModel$loadings)
loadTotals <- as.data.frame(cbind(faBenBestModel[["Structure"]], Sum = rowSums(apply(abs(faBenBestModel[["Structure"]]), 
    2, FUN = abs))))
loadProps <- loadTotals %>% mutate_at(vars(c(1:nfac)), function(x) {
    abs(x)/loadTotals$Sum
}) %>% cbind(oQues = rownames(loadTotals))
faBenLoadings <- vector("list", nfac)
for (i in 1:nfac) {
    l <- faBenBestModel[["loadings"]] %>% unclass %>% .[, paste("ML", i, sep = "")]
    h <- head(l[order(l, decreasing = T)], 5)
    t <- tail(l[order(l, decreasing = T)], 5)
    faBenLoadings[[i]] <- data.frame(oQues = c(names(h), names(t)), Loadings = c(h, 
        t)) %>% inner_join(qKey, by = c(oQues = "Short")) %>% mutate(pVal = pvals.fa[oQues, 
        paste("ML", i, sep = "")]) %>% inner_join(loadProps)
}
DT::datatable(faBenLoadings[[1]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(faBenLoadings[[2]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(faBenLoadings[[3]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

PCA

PCA: Barriers

rotations <- c("none", "varimax", "quartimax", "promax", "oblimin", "simplimax", 
    "cluster")
nfac <- 5
rms <- vector()
for (i in seq_along(rotations)) {
    Result <- psych::pca(iBarriers %>% select(-one_of(Categorical)), nfactors = nfac, 
        rotate = rotations[i])
    rms[i] <- Result[["rms"]]
}
# ----------------------- Mon Mar 05 10:39:55 2018 ------------------------# All
# rotations return the same root mean squared error, thus 'none' will be used for
# the rotation parameter
pcaBar <- psych::pca(iBarriers %>% select(-one_of(Categorical)), nfactors = nfac, 
    rotate = "none")
pvals.pca <- loadings2pval(pcaBar$loadings)
loadTotals <- as.data.frame(cbind(pcaBar[["Structure"]], Sum = rowSums(apply(abs(pcaBar[["Structure"]]), 
    2, FUN = abs))))
loadProps <- loadTotals %>% mutate_at(vars(c(1:nfac)), function(x) {
    abs(x)/loadTotals$Sum
}) %>% cbind(oQues = rownames(loadTotals))
pcaBarLoadings <- vector("list", nfac)
for (i in 1:nfac) {
    l <- pcaBar[["loadings"]] %>% unclass %>% .[, paste("PC", i, sep = "")]
    h <- head(l[order(l, decreasing = T)], 5)
    t <- tail(l[order(l, decreasing = T)], 5)
    pcaBarLoadings[[i]] <- data.frame(oQues = c(names(h), names(t)), Loadings = c(h, 
        t)) %>% inner_join(qKey, by = c(oQues = "Short")) %>% mutate(pVal = pvals.pca[oQues, 
        paste("PC", i, sep = "")]) %>% inner_join(loadProps)
}

DT::datatable(pcaBarLoadings[[1]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(pcaBarLoadings[[2]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(pcaBarLoadings[[3]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(pcaBarLoadings[[4]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(pcaBarLoadings[[5]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

PCA: Benefits

rotations <- c("none", "varimax", "quartimax", "promax", "oblimin", "simplimax", 
    "cluster")
rms <- vector()
nfac <- 2
for (i in seq_along(rotations)) {
    Result <- psych::pca(iBenefits %>% select(-one_of(Categorical)), nfactors = nfac, 
        rotate = rotations[i])
    rms[i] <- Result[["rms"]]
}
# ----------------------- Mon Mar 05 10:39:55 2018 ------------------------# All
# rotations return the same root mean squared error, thus 'none' will be used for
# the rotation parameter
pcaBen <- psych::pca(iBenefits %>% select(-one_of(Categorical)), nfactors = nfac, 
    rotate = "none")

pvals.pca <- loadings2pval(pcaBen$loadings)
loadTotals <- as.data.frame(cbind(pcaBen[["Structure"]], Sum = rowSums(apply(abs(pcaBen[["Structure"]]), 
    2, FUN = abs))))
loadProps <- loadTotals %>% mutate_at(vars(c(1:nfac)), function(x) {
    abs(x)/loadTotals$Sum
}) %>% cbind(oQues = rownames(loadTotals))
pcaBenLoadings <- vector("list", nfac)
for (i in 1:nfac) {
    l <- pcaBen[["loadings"]] %>% unclass %>% .[, paste("PC", i, sep = "")]
    h <- head(l[order(l, decreasing = T)], 5)
    t <- tail(l[order(l, decreasing = T)], 5)
    pcaBenLoadings[[i]] <- data.frame(oQues = c(names(h), names(t)), Loadings = c(h, 
        t)) %>% inner_join(qKey, by = c(oQues = "Short")) %>% mutate(pVal = pvals.pca[oQues, 
        paste("PC", i, sep = "")]) %>% inner_join(loadProps)
}

DT::datatable(pcaBenLoadings[[1]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

DT::datatable(pcaBenLoadings[[2]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

df[, 3] <- factor(x = df[, 3], levels = c("Omnivore", "Flexitarian", "Pescetarian", 
    "Vegetarian", "Vegan"), ordered = T)

Multinomial Logistic Regression

MLR: Barriers

library(caret)
trControl <- caret::trainControl(method = "repeatedcv", number = 4, index = caret::createFolds(Barriers, 
    k = 10), repeats = 4, p = 0.75, allowParallel = T)

library(doParallel)
# make a cluster with 6 cores
cl <- makeCluster(detectCores() - 2, type = "PSOCK")
# register the number of parallel workers
registerDoParallel(cl)
# return number of parallel workers
getDoParWorkers()

## [1] 6

iBarriers.v <- iBarriers %>% filter(Diet != "Vegan")
iBarriers.v$Diet <- iBarriers.v %>% .$Diet %>% factor
best.mlm <- caret::train(Diet ~ . - Interest_Veganism, data = iBarriers.v, method = "multinom", 
    trControl = trControl)

## # weights:  144 (105 variable)
## initial  value 124.766493 
## iter  10 value 47.740691
## iter  20 value 27.363454
## iter  30 value 21.431328
## iter  40 value 20.791093
## iter  50 value 20.548323
## iter  60 value 20.481806
## iter  70 value 20.476857
## final  value 20.476767 
## converged

stopCluster(cl)

# ----------------------- Tue Mar 06 11:41:56 2018 ------------------------# Test
# Accuracy
caret::confusionMatrix(predict(best.mlm), iBarriers.v$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian
##   Omnivore          44           0           0          0
##   Flexitarian        0          24           0          0
##   Pescetarian        0           0          12          0
##   Vegetarian         0           0           0         10
## 
## Overall Statistics
##                                                
##                Accuracy : 1                    
##                  95% CI : (0.9598, 1)          
##     No Information Rate : 0.4889               
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 1                    
##  Mcnemar's Test P-Value : NA                   
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   1.0000             1.0000             1.0000
## Specificity                   1.0000             1.0000             1.0000
## Pos Pred Value                1.0000             1.0000             1.0000
## Neg Pred Value                1.0000             1.0000             1.0000
## Prevalence                    0.4889             0.2667             0.1333
## Detection Rate                0.4889             0.2667             0.1333
## Detection Prevalence          0.4889             0.2667             0.1333
## Balanced Accuracy             1.0000             1.0000             1.0000
##                      Class: Vegetarian
## Sensitivity                     1.0000
## Specificity                     1.0000
## Pos Pred Value                  1.0000
## Neg Pred Value                  1.0000
## Prevalence                      0.1111
## Detection Rate                  0.1111
## Detection Prevalence            0.1111
## Balanced Accuracy               1.0000

# 100% accurate. Model is likely overfitted to training data, but this will lead
# to interesting insight about the factors that predict preference.
# ----------------------- Tue Mar 06 08:45:06 2018 ------------------------#
# Display coefficients on both models

best.mlm.coef <- coef(best.mlm$finalModel) %>% as.matrix()
best.mlm.coef <- as.data.frame(best.mlm.coef, stringsAsFactors = F) %>% rownames_to_column() %>% 
    gather(key = "rowname", value = -rowname) %>% cbind(Feat = rep(rownames(coef(best.mlm$finalModel)), 
    35)) %>% rename(Coef = "-rowname") %>% mutate(pVal = as.vector(loadings2pval(Coef))) %>% 
    arrange(pVal) %>% inner_join(qKey, by = c(rowname = "Short"))
DT::datatable(best.mlm.coef) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

bar.pval.vars <- best.mlm.coef %>% filter(pVal < 0.1) %>% .$rowname

MLR: Barriers - Stepwise Feature Selection

library("nnet")
mlm <- multinom(Diet ~ ., data = iBarriers.v)

## # weights:  148 (108 variable)
## initial  value 124.766493 
## iter  10 value 44.963112
## iter  20 value 14.042768
## iter  30 value 0.411924
## iter  40 value 0.002488
## final  value 0.000081 
## converged

summary(step.mlm <- step(mlm, direction = "both", trace = 0))

## trying - Interest_Veganism 
## trying - Question4 
## trying - Question5 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question9 
## trying - Question10 
## trying - Question11 
## trying - Question12 
## trying - Question13 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question23 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - SJA 
## trying - Animal_Lover 
## trying - Group 
## trying - Race 
## trying - Gender 
## trying - Age_Cohort 
## # weights:  144 (105 variable)
## initial  value 124.766493 
## iter  10 value 46.188223
## iter  20 value 19.490316
## iter  30 value 1.564995
## iter  40 value 0.005630
## final  value 0.000050 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question9 
## trying - Question10 
## trying - Question11 
## trying - Question12 
## trying - Question13 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question23 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - SJA 
## trying - Animal_Lover 
## trying - Group 
## trying - Race 
## trying - Gender 
## trying - Age_Cohort 
## trying + Question5 
## # weights:  140 (102 variable)
## initial  value 124.766493 
## iter  10 value 46.450451
## iter  20 value 16.214609
## iter  30 value 0.547619
## iter  40 value 0.006369
## final  value 0.000051 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question9 
## trying - Question10 
## trying - Question11 
## trying - Question12 
## trying - Question13 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question23 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - SJA 
## trying - Animal_Lover 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Gender 
## # weights:  136 (99 variable)
## initial  value 124.766493 
## iter  10 value 47.663897
## iter  20 value 19.909807
## iter  30 value 1.284903
## iter  40 value 0.015059
## final  value 0.000052 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question9 
## trying - Question10 
## trying - Question11 
## trying - Question13 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question23 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - SJA 
## trying - Animal_Lover 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question12 
## trying + Gender 
## # weights:  132 (96 variable)
## initial  value 124.766493 
## iter  10 value 48.201173
## iter  20 value 21.442639
## iter  30 value 3.347825
## iter  40 value 0.037137
## iter  50 value 0.001299
## final  value 0.000051 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question10 
## trying - Question11 
## trying - Question13 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question23 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - SJA 
## trying - Animal_Lover 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question12 
## trying + Gender 
## # weights:  128 (93 variable)
## initial  value 124.766493 
## iter  10 value 41.753508
## iter  20 value 20.435798
## iter  30 value 5.015973
## iter  40 value 0.061489
## iter  50 value 0.000103
## iter  50 value 0.000053
## iter  50 value 0.000053
## final  value 0.000053 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question10 
## trying - Question11 
## trying - Question13 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question23 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Animal_Lover 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question12 
## trying + SJA 
## trying + Gender 
## # weights:  124 (90 variable)
## initial  value 124.766493 
## iter  10 value 41.668516
## iter  20 value 21.060753
## iter  30 value 6.647382
## iter  40 value 0.128429
## iter  50 value 0.000198
## final  value 0.000052 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question13 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question23 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Animal_Lover 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + SJA 
## trying + Gender 
## # weights:  120 (87 variable)
## initial  value 124.766493 
## iter  10 value 43.195177
## iter  20 value 23.408919
## iter  30 value 11.047688
## iter  40 value 0.897889
## iter  50 value 0.001465
## final  value 0.000052 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question23 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Animal_Lover 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + SJA 
## trying + Gender 
## # weights:  116 (84 variable)
## initial  value 124.766493 
## iter  10 value 44.014714
## iter  20 value 28.145918
## iter  30 value 14.241176
## iter  40 value 3.262813
## iter  50 value 0.004956
## final  value 0.000052 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Animal_Lover 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question23 
## trying + SJA 
## trying + Gender 
## # weights:  112 (81 variable)
## initial  value 124.766493 
## iter  10 value 45.247957
## iter  20 value 24.673261
## iter  30 value 11.290692
## iter  40 value 0.327068
## iter  50 value 0.000602
## final  value 0.000054 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question17 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question23 
## trying + SJA 
## trying + Animal_Lover 
## trying + Gender 
## # weights:  108 (78 variable)
## initial  value 124.766493 
## iter  10 value 46.608951
## iter  20 value 25.923687
## iter  30 value 13.895202
## iter  40 value 0.711476
## iter  50 value 0.002131
## final  value 0.000052 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question24 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question17 
## trying + Question23 
## trying + SJA 
## trying + Animal_Lover 
## trying + Gender 
## # weights:  104 (75 variable)
## initial  value 124.766493 
## iter  10 value 47.731850
## iter  20 value 27.655145
## iter  30 value 15.359877
## iter  40 value 1.671001
## iter  50 value 0.003140
## final  value 0.000052 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question22 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question17 
## trying + Question23 
## trying + Question24 
## trying + SJA 
## trying + Animal_Lover 
## trying + Gender 
## # weights:  100 (72 variable)
## initial  value 124.766493 
## iter  10 value 49.616446
## iter  20 value 29.557102
## iter  30 value 16.241720
## iter  40 value 7.972317
## iter  50 value 0.099680
## iter  60 value 0.000101
## iter  60 value 0.000051
## iter  60 value 0.000051
## final  value 0.000051 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question18 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question17 
## trying + Question22 
## trying + Question23 
## trying + Question24 
## trying + SJA 
## trying + Animal_Lover 
## trying + Gender 
## # weights:  96 (69 variable)
## initial  value 124.766493 
## iter  10 value 49.977540
## iter  20 value 31.940686
## iter  30 value 17.790865
## iter  40 value 9.854907
## iter  50 value 3.111797
## iter  60 value 0.002994
## final  value 0.000052 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question14 
## trying - Question15 
## trying - Question16 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question17 
## trying + Question18 
## trying + Question22 
## trying + Question23 
## trying + Question24 
## trying + SJA 
## trying + Animal_Lover 
## trying + Gender 
## # weights:  92 (66 variable)
## initial  value 124.766493 
## iter  10 value 52.189172
## iter  20 value 34.274533
## iter  30 value 20.520691
## iter  40 value 10.193880
## iter  50 value 0.362358
## iter  60 value 0.001199
## final  value 0.000054 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question15 
## trying - Question16 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question14 
## trying + Question17 
## trying + Question18 
## trying + Question22 
## trying + Question23 
## trying + Question24 
## trying + SJA 
## trying + Animal_Lover 
## trying + Gender 
## # weights:  88 (63 variable)
## initial  value 124.766493 
## iter  10 value 56.558441
## iter  20 value 34.978416
## iter  30 value 23.614229
## iter  40 value 13.036827
## iter  50 value 6.533934
## iter  60 value 0.042223
## final  value 0.000053 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question15 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Group 
## trying - Race 
## trying - Age_Cohort 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question14 
## trying + Question16 
## trying + Question17 
## trying + Question18 
## trying + Question22 
## trying + Question23 
## trying + Question24 
## trying + SJA 
## trying + Animal_Lover 
## trying + Gender 
## # weights:  84 (60 variable)
## initial  value 124.766493 
## iter  10 value 58.509776
## iter  20 value 34.602615
## iter  30 value 21.377537
## iter  40 value 14.179416
## iter  50 value 10.198975
## iter  60 value 1.358393
## iter  70 value 0.001723
## final  value 0.000055 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question15 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Question29 
## trying - Environmentalist 
## trying - Feminist 
## trying - Group 
## trying - Race 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question14 
## trying + Question16 
## trying + Question17 
## trying + Question18 
## trying + Question22 
## trying + Question23 
## trying + Question24 
## trying + SJA 
## trying + Animal_Lover 
## trying + Gender 
## trying + Age_Cohort 
## # weights:  80 (57 variable)
## initial  value 124.766493 
## iter  10 value 47.792443
## iter  20 value 34.392573
## iter  30 value 22.720912
## iter  40 value 15.345937
## iter  50 value 11.160363
## iter  60 value 9.751084
## iter  70 value 8.681686
## iter  80 value 0.435872
## iter  90 value 0.000449
## final  value 0.000057 
## converged
## trying - Interest_Veganism 
## trying - Question4 
## trying - Question6 
## trying - Question7 
## trying - Question8 
## trying - Question11 
## trying - Question15 
## trying - Question19 
## trying - Question20 
## trying - Question21 
## trying - Question25 
## trying - Question26 
## trying - Question27 
## trying - Question28 
## trying - Environmentalist 
## trying - Feminist 
## trying - Group 
## trying - Race 
## trying + Question5 
## trying + Question9 
## trying + Question10 
## trying + Question12 
## trying + Question13 
## trying + Question14 
## trying + Question16 
## trying + Question17 
## trying + Question18 
## trying + Question22 
## trying + Question23 
## trying + Question24 
## trying + Question29 
## trying + SJA 
## trying + Animal_Lover 
## trying + Gender 
## trying + Age_Cohort

## Call:
## multinom(formula = Diet ~ Interest_Veganism + Question4 + Question6 + 
##     Question7 + Question8 + Question11 + Question15 + Question19 + 
##     Question20 + Question21 + Question25 + Question26 + Question27 + 
##     Question28 + Environmentalist + Feminist + Group + Race, 
##     data = iBarriers.v)
## 
## Coefficients:
##             (Intercept) Interest_Veganism Question4  Question6  Question7
## Flexitarian  13060.2024          6289.771 1443.4186 -2852.8008  3237.1035
## Pescetarian    434.8581          2381.473  681.0775 -5771.9469 10085.1758
## Vegetarian   46979.4959          3462.682 -418.2598  -520.6968   186.5502
##              Question8 Question11 Question15 Question19 Question20
## Flexitarian -1193.0348  -11106.79   2845.401  -1502.903  -1908.452
## Pescetarian -1682.6864  -23510.77   2187.259   4728.274   4988.434
## Vegetarian   -895.8328  -18712.46  -1537.910  -4664.936   3991.689
##             Question21 Question25 Question26 Question27 Question28
## Flexitarian   1529.052  -3508.986   913.8195  -586.3496  -2080.202
## Pescetarian   2256.248 -10978.140 -2556.0877  5574.8685  -1242.906
## Vegetarian    1988.132  -4558.468 -7656.9615  7597.3174  -2564.746
##             Environmentalist    Feminist     Group     Race
## Flexitarian         4232.887   882.14868 -166.9211 3666.805
## Pescetarian        15235.348 -6948.87762  225.7232 8506.220
## Vegetarian          3429.519    27.00841 1449.7197 -264.037
## 
## Std. Errors:
##               (Intercept) Interest_Veganism     Question4     Question6
## Flexitarian  2.505995e+03      1.051699e+03  3.022355e+03  6.141512e+03
## Pescetarian  1.807246e-09      1.807246e-09  1.807246e-09  1.807246e-09
## Vegetarian  2.494757e-139     4.989515e-139 7.484272e-139 4.989515e-139
##                 Question7     Question8    Question11    Question15
## Flexitarian  7.947033e+03  4.744712e+03  3.033184e+03  1.462425e+03
## Pescetarian  1.807246e-09  5.421739e-09  5.421739e-09  9.036232e-09
## Vegetarian  7.484272e-139 9.979029e-139 4.989515e-139 4.989515e-139
##                Question19    Question20    Question21    Question25
## Flexitarian  1.292479e+04  5.015072e+03  3.331534e+03  1.356068e+03
## Pescetarian  1.807246e-09  1.807246e-09  3.614493e-09  3.614493e-09
## Vegetarian  7.484272e-139 1.247379e-138 2.494757e-139 1.247379e-138
##                Question26    Question27    Question28 Environmentalist
## Flexitarian  5.257355e+03  5.127986e+03  3.099804e+03     2.338078e+03
## Pescetarian  1.807246e-09  1.807246e-09  1.380529e-09     9.036232e-09
## Vegetarian  2.494757e-139 4.989515e-139 4.989515e-139    1.247379e-138
##                  Feminist         Group          Race
## Flexitarian  3.722167e+03  1.913628e+03  3.613079e+03
## Pescetarian  1.807246e-09  1.807246e-09  1.807246e-09
## Vegetarian  1.247379e-138 1.247379e-138 2.494757e-139
## 
## Residual Deviance: 0.0001148672 
## AIC: 114.0001

best.mlm.step <- multinom(formula = Diet ~ Question5 + Question7 + Question8 + Question9 + 
    Question11 + Question13 + Question16 + Question18 + Question21 + Question22 + 
    Question24 + Question25 + Question28 + Question29 + Environmentalist + Animal_Lover + 
    Group + Race + Gender + Age_Cohort, data = iBarriers.v)

## # weights:  88 (63 variable)
## initial  value 124.766493 
## iter  10 value 53.573126
## iter  20 value 32.864539
## iter  30 value 22.164009
## iter  40 value 14.860347
## iter  50 value 13.069672
## iter  60 value 12.092106
## iter  70 value 10.805683
## iter  80 value 9.979608
## iter  90 value 9.674509
## iter 100 value 9.635162
## final  value 9.635162 
## stopped after 100 iterations

caret::confusionMatrix(predict(best.mlm.step), iBarriers.v$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian
##   Omnivore          42           1           0          0
##   Flexitarian        2          23           0          0
##   Pescetarian        0           0          12          0
##   Vegetarian         0           0           0         10
## 
## Overall Statistics
##                                                
##                Accuracy : 0.9667               
##                  95% CI : (0.9057, 0.9931)     
##     No Information Rate : 0.4889               
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 0.9497               
##  Mcnemar's Test P-Value : NA                   
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.9545             0.9583             1.0000
## Specificity                   0.9783             0.9697             1.0000
## Pos Pred Value                0.9767             0.9200             1.0000
## Neg Pred Value                0.9574             0.9846             1.0000
## Prevalence                    0.4889             0.2667             0.1333
## Detection Rate                0.4667             0.2556             0.1333
## Detection Prevalence          0.4778             0.2778             0.1333
## Balanced Accuracy             0.9664             0.9640             1.0000
##                      Class: Vegetarian
## Sensitivity                     1.0000
## Specificity                     1.0000
## Pos Pred Value                  1.0000
## Neg Pred Value                  1.0000
## Prevalence                      0.1111
## Detection Rate                  0.1111
## Detection Prevalence            0.1111
## Balanced Accuracy               1.0000

bar.step.vars <- c("Question5", "Question7", "Question8", "Question9", "Question11", 
    "Question13", "Question16", "Question18", "Question21", "Question22", "Question24", 
    "Question25", "Question28", "Question29", "Environmentalist", "Animal_Lover", 
    "Group", "Race", "Gender", "Age_Cohort")

MLR: Benefits

Note: This runs fine in R but I can’t get it to print to HTML properly, so I’ll have to do a workaround.

# library(caret) trControl <- caret::trainControl(method='repeatedcv', number=4,
# index=caret::createFolds(iBenefits, k=10), repeats = 4,p=.75, allowParallel =
# T,verboseIter = T) library(doParallel) # make a cluster with 6 cores cl <-
# makeCluster(detectCores()-2,type='PSOCK') # register the number of parallel
# workers registerDoParallel(cl) # return number of parallel workers
# getDoParWorkers() best.mlm.ben <- caret::train(Diet ~ . - Interest_Veganism,
# data=iBenefits, method='multinom',trControl=trControl) stopCluster(cl) #
# ----------------------- Tue Mar 06 2018-03-06 1150------------------------# #
# Test Accuracy best.mlm.ben.acc <-
# caret::confusionMatrix(predict(best.mlm.ben),iBenefits$Diet) # 100% accurate.
# Model is likely overfitted to training data, but this will lead to interesting
# insight about the factors that predict preference.  # -----------------------
# Tue Mar 06 08:45:06 2018 ------------------------# # Display coefficients on
# both models best.mlm.ben.coef <- coef(best.mlm.ben$finalModel) %>% as.matrix()
# best.mlm.ben.coef <- as.data.frame(best.mlm.ben.coef,stringsAsFactors = F) %>%
# rownames_to_column() %>% gather(key='rowname',value=-rowname) %>%
# cbind(Feat=rep(rownames(coef(best.mlm.ben$finalModel)),36)) %>%
# rename('Coef'='-rowname') %>% mutate(pVal=as.vector(loadings2pval(Coef))) %>%
# arrange(pVal) %>% inner_join(qKey,by=c('rowname'='Short'))
# save(best.mlm.ben.acc,best.mlm.ben.coef,best.mlm.ben.step,file='MLRbenefits.Rdata')
load(file = "MLRbenefits.Rdata")
# ----------------------- Tue Mar 06 12:20:43 2018 ------------------------#
# Accuracy of Model
best.mlm.ben.acc

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian Vegan
##   Omnivore          44           0           0          0     0
##   Flexitarian        0          24           0          0     0
##   Pescetarian        0           0          12          0     0
##   Vegetarian         0           0           0         10     0
##   Vegan              0           0           0          0    16
## 
## Overall Statistics
##                                                
##                Accuracy : 1                    
##                  95% CI : (0.9658, 1)          
##     No Information Rate : 0.4151               
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 1                    
##  Mcnemar's Test P-Value : NA                   
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   1.0000             1.0000             1.0000
## Specificity                   1.0000             1.0000             1.0000
## Pos Pred Value                1.0000             1.0000             1.0000
## Neg Pred Value                1.0000             1.0000             1.0000
## Prevalence                    0.4151             0.2264             0.1132
## Detection Rate                0.4151             0.2264             0.1132
## Detection Prevalence          0.4151             0.2264             0.1132
## Balanced Accuracy             1.0000             1.0000             1.0000
##                      Class: Vegetarian Class: Vegan
## Sensitivity                    1.00000       1.0000
## Specificity                    1.00000       1.0000
## Pos Pred Value                 1.00000       1.0000
## Neg Pred Value                 1.00000       1.0000
## Prevalence                     0.09434       0.1509
## Detection Rate                 0.09434       0.1509
## Detection Prevalence           0.09434       0.1509
## Balanced Accuracy              1.00000       1.0000

# Coefficients and their statistical significance
DT::datatable(best.mlm.ben.coef) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1, 
    c("lightgreen", "white")))

ben.pval.vars <- best.mlm.ben.coef %>% filter(pVal < 0.1) %>% .$rowname

MLR: Benefits - Stepwise Feature Selection

# ----------------------- Tue Mar 06 14:33:34 2018 ------------------------#
# Stepwise feature selection

mlm.ben <- nnet::multinom(Diet ~ ., data = iBenefits)

## # weights:  190 (148 variable)
## initial  value 168.990981 
## iter  10 value 52.848570
## iter  20 value 30.160739
## iter  30 value 15.988447
## iter  40 value 8.408688
## iter  50 value 0.039870
## final  value 0.000062 
## converged

# summary(step.mlm.ben <- step(mlm.ben,direction = 'both',trace=0)) #This errors
# so I will just load it.
best.mlm.ben.step <- nnet::multinom(formula = Diet ~ Interest_Veganism + Question30 + 
    Question31 + Question32 + Question33 + Question35 + Question36 + Question37 + 
    Question41 + Question42 + Question44 + Question45 + Question46 + Question47 + 
    Question51 + Question52 + Question53 + Question54 + Question55 + Question56 + 
    Environmentalist + Feminist + Animal_Lover + Group + Age_Cohort, data = iBenefits)

## # weights:  135 (104 variable)
## initial  value 170.600419 
## iter  10 value 67.165375
## iter  20 value 47.376452
## iter  30 value 32.603881
## iter  40 value 23.414883
## iter  50 value 21.444379
## iter  60 value 20.148754
## iter  70 value 19.144439
## iter  80 value 18.952579
## iter  90 value 18.940168
## iter 100 value 18.939265
## final  value 18.939265 
## stopped after 100 iterations

caret::confusionMatrix(predict(best.mlm.ben.step), iBenefits$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian Vegan
##   Omnivore          40           4           0          0     0
##   Flexitarian        4          20           0          0     0
##   Pescetarian        0           0          12          0     0
##   Vegetarian         0           0           0         10     0
##   Vegan              0           0           0          0    16
## 
## Overall Statistics
##                                                
##                Accuracy : 0.9245               
##                  95% CI : (0.8567, 0.9669)     
##     No Information Rate : 0.4151               
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 0.8969               
##  Mcnemar's Test P-Value : NA                   
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.9091             0.8333             1.0000
## Specificity                   0.9355             0.9512             1.0000
## Pos Pred Value                0.9091             0.8333             1.0000
## Neg Pred Value                0.9355             0.9512             1.0000
## Prevalence                    0.4151             0.2264             0.1132
## Detection Rate                0.3774             0.1887             0.1132
## Detection Prevalence          0.4151             0.2264             0.1132
## Balanced Accuracy             0.9223             0.8923             1.0000
##                      Class: Vegetarian Class: Vegan
## Sensitivity                    1.00000       1.0000
## Specificity                    1.00000       1.0000
## Pos Pred Value                 1.00000       1.0000
## Neg Pred Value                 1.00000       1.0000
## Prevalence                     0.09434       0.1509
## Detection Rate                 0.09434       0.1509
## Detection Prevalence           0.09434       0.1509
## Balanced Accuracy              1.00000       1.0000

ben.step.vars <- c("Interest_Veganism", "Question30", "Question31", "Question32", 
    "Question33", "Question35", "Question36", "Question37", "Question41", "Question42", 
    "Question44", "Question45", "Question46", "Question47", "Question51", "Question52", 
    "Question53", "Question54", "Question55", "Question56", "Environmentalist", "Feminist", 
    "Animal_Lover", "Group", "Age_Cohort")

MLR: Conclusions - Best Questions for Predictive Value

# ----------------------- Tue Mar 06 14:54:58 2018 ------------------------#
# These questions will have the best predictive value for Diet according to
# whether they are a benefit or a barrier
(BarQ <- union(bar.pval.vars, bar.step.vars))

##  [1] "Question11"       "Environmentalist" "Age_Cohort"      
##  [4] "Question13"       "Question22"       "Question7"       
##  [7] "Question17"       "Question25"       "Question12"      
## [10] "Question24"       "SJA"              "Question5"       
## [13] "Question8"        "Question9"        "Question16"      
## [16] "Question18"       "Question21"       "Question28"      
## [19] "Question29"       "Animal_Lover"     "Group"           
## [22] "Race"             "Gender"

(BenQ <- union(ben.pval.vars, ben.step.vars))

##  [1] "Question30"        "Question38"        "Question55"       
##  [4] "Question52"        "Question31"        "Question45"       
##  [7] "Question50"        "Race"              "Question32"       
## [10] "Question43"        "Interest_Veganism" "Question33"       
## [13] "Question35"        "Question36"        "Question37"       
## [16] "Question41"        "Question42"        "Question44"       
## [19] "Question46"        "Question47"        "Question51"       
## [22] "Question53"        "Question54"        "Question56"       
## [25] "Environmentalist"  "Feminist"          "Animal_Lover"     
## [28] "Group"             "Age_Cohort"

OptQ <- list(Barriers = (data.frame(Short = BarQ) %>% inner_join(qKey, by = c("Short"))), 
    Benefits = (data.frame(Short = BenQ) %>% inner_join(qKey, by = c("Short"))), 
    All = (data.frame(Short = union(BarQ, BenQ)) %>% inner_join(qKey, by = c("Short"))))
DT::datatable(OptQ[["Benefits"]])

DT::datatable(OptQ[["Barriers"]])

DT::datatable(OptQ[["All"]])

Evan’s Scale Testing

Create the Datasets

# ----------------------- Wed Mar 07 18:03:15 2018 ------------------------#
# Barriers
iBarC <- iBarriers %>% select(one_of(BarC.key$Short))
iBarH <- iBarriers %>% select(one_of(BarH.key$Short))
iBarK <- iBarriers %>% select(one_of(BarK.key$Short))
iBarN <- iBarriers %>% select(one_of(BarN.key$Short))
iBarP <- iBarriers %>% select(one_of(BarP.key$Short))
Bar.scales <- list(Convenience = iBarC, Health = iBarH, Knowledge = iBarK, Normative = iBarN, 
    Preference = iBarP)
Bar.groups <- list(1:4, 5:8, 9:11, 12:15, 16:20)
names(Bar.groups) <- names(Bar.scales)
Bar.all <- do.call(cbind, Bar.scales)
psych::scoreItems(keys = psych::make.keys(nvars = 20, keys.list = Bar.groups, item.labels = colnames(Bar.all)), 
    items = Bar.all)

## Call: psych::scoreItems(keys = psych::make.keys(nvars = 20, keys.list = Bar.groups, 
##     item.labels = colnames(Bar.all)), items = Bar.all)
## 
## (Unstandardized) Alpha:
##       Convenience Health Knowledge Normative Preference
## alpha        0.71   0.84      0.76      0.63       0.72
## 
## Standard errors of unstandardized Alpha:
##       Convenience Health Knowledge Normative Preference
## ASE         0.089   0.07       0.1     0.097      0.076
## 
## Average item correlation:
##           Convenience Health Knowledge Normative Preference
## average.r        0.38   0.56      0.51       0.3       0.35
## 
##  Guttman 6* reliability: 
##          Convenience Health Knowledge Normative Preference
## Lambda.6        0.74   0.86      0.75      0.73       0.81
## 
## Signal/Noise based upon av.r : 
##              Convenience Health Knowledge Normative Preference
## Signal/Noise         2.4    5.1       3.1       1.7        2.6
## 
## Scale intercorrelations corrected for attenuation 
##  raw correlations below the diagonal, alpha on the diagonal 
##  corrected correlations above the diagonal:
##             Convenience Health Knowledge Normative Preference
## Convenience        0.71   0.29      0.61      0.27       0.39
## Health             0.22   0.84      0.36      0.31       0.56
## Knowledge          0.44   0.29      0.76      0.20       0.23
## Normative          0.18   0.23      0.14      0.63       0.49
## Preference         0.28   0.43      0.17      0.33       0.72
## 
##  In order to see the item by scale loadings and frequency counts of the data
##  print with the short option = FALSE

# ----------------------- Wed Mar 07 18:08:58 2018 ------------------------#
# Benefits
iBenH <- iBenefits %>% select(one_of(BenH.key$Short))
iBenME <- iBenefits %>% select(one_of(BenME.key$Short))
Ben.scales <- list(Health = iBenH, MoralEnv = iBenME)
lapply(Ben.scales, length)

## $Health
## [1] 11
## 
## $MoralEnv
## [1] 9

Ben.groups <- list(Health = 1:11, MoralEnv = 12:20)
Ben.all <- do.call(cbind, Ben.scales)
psych::scoreItems(keys = psych::make.keys(nvars = 20, keys.list = Ben.groups, item.labels = colnames(Ben.all)), 
    items = Ben.all)

## Call: psych::scoreItems(keys = psych::make.keys(nvars = 20, keys.list = Ben.groups, 
##     item.labels = colnames(Ben.all)), items = Ben.all)
## 
## (Unstandardized) Alpha:
##       Health MoralEnv
## alpha   0.95     0.95
## 
## Standard errors of unstandardized Alpha:
##       Health MoralEnv
## ASE    0.021    0.024
## 
## Average item correlation:
##           Health MoralEnv
## average.r   0.61     0.67
## 
##  Guttman 6* reliability: 
##          Health MoralEnv
## Lambda.6   0.96     0.97
## 
## Signal/Noise based upon av.r : 
##              Health MoralEnv
## Signal/Noise     17       18
## 
## Scale intercorrelations corrected for attenuation 
##  raw correlations below the diagonal, alpha on the diagonal 
##  corrected correlations above the diagonal:
##          Health MoralEnv
## Health     0.95     0.60
## MoralEnv   0.56     0.95
## 
##  In order to see the item by scale loadings and frequency counts of the data
##  print with the short option = FALSE

# ----------------------- Wed Mar 07 20:51:29 2018 ------------------------#
# Compare the scale questions with those with predictive power from the MLR
all(c(lapply(Ben.scales, names) %>% unlist, lapply(Bar.scales, names) %>% unlist) %in% 
    OptQ[["All"]]$Short)

## [1] FALSE

# View the questions that are not in the feature selection set
c(lapply(Ben.scales, names) %>% unlist, lapply(Bar.scales, names) %>% unlist)[!c(lapply(Ben.scales, 
    names) %>% unlist, lapply(Bar.scales, names) %>% unlist) %in% OptQ[["All"]]$Short]

##      Health5     Health10    MoralEnv5 Convenience2 Convenience4 
## "Question34" "Question40" "Question49" "Question19" "Question20" 
##      Health1      Health2   Knowledge3   Normative3   Normative4 
## "Question26" "Question27" "Question23"  "Question4"  "Question6" 
##  Preference2  Preference4 
## "Question15" "Question14"

MLR

MLR: Evan’s Barriers

cat("Diet~", lapply(Bar.scales, names) %>% unlist, sep = "+")

## Diet~+Question17+Question19+Question18+Question20+Question26+Question27+Question28+Question29+Question24+Question25+Question23+Question5+Question9+Question4+Question6+Question11+Question15+Question13+Question14+Question12

Evan.bar.mlr <- nnet::multinom(Diet ~ Interest_Veganism + Question17 + Question19 + 
    Question18 + Question20 + Question26 + Question27 + Question28 + Question29 + 
    Question24 + Question25 + Question23 + Question5 + Question9 + Question4 + Question6 + 
    Question11 + Question15 + Question13 + Question14 + Question12, data = iBarriers)

## # weights:  92 (66 variable)
## initial  value 124.766493 
## iter  10 value 63.311608
## iter  20 value 40.779788
## iter  30 value 26.626715
## iter  40 value 19.804762
## iter  50 value 18.446852
## iter  60 value 17.959982
## iter  70 value 17.679003
## iter  80 value 17.621367
## iter  90 value 17.613792
## iter 100 value 17.613381
## final  value 17.613381 
## stopped after 100 iterations

Evan.bar.mlr.pred <- predict(Evan.bar.mlr, iBarriers)
caret::confusionMatrix(Evan.bar.mlr.pred, iBarriers$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian Vegan
##   Omnivore          41           4           0          0     0
##   Flexitarian        3          20           0          0     0
##   Pescetarian        0           0          12          0     0
##   Vegetarian         0           0           0         10     0
##   Vegan              0           0           0          0     0
## 
## Overall Statistics
##                                                
##                Accuracy : 0.9222               
##                  95% CI : (0.8463, 0.9682)     
##     No Information Rate : 0.4889               
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 0.8817               
##  Mcnemar's Test P-Value : NA                   
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.9318             0.8333             1.0000
## Specificity                   0.9130             0.9545             1.0000
## Pos Pred Value                0.9111             0.8696             1.0000
## Neg Pred Value                0.9333             0.9403             1.0000
## Prevalence                    0.4889             0.2667             0.1333
## Detection Rate                0.4556             0.2222             0.1333
## Detection Prevalence          0.5000             0.2556             0.1333
## Balanced Accuracy             0.9224             0.8939             1.0000
##                      Class: Vegetarian Class: Vegan
## Sensitivity                     1.0000           NA
## Specificity                     1.0000            1
## Pos Pred Value                  1.0000           NA
## Neg Pred Value                  1.0000           NA
## Prevalence                      0.1111            0
## Detection Rate                  0.1111            0
## Detection Prevalence            0.1111            0
## Balanced Accuracy               1.0000           NA

# 91% Accuracy w/out Interest_Veganism 93% With

MLR: Evan’s Barriers as Means

Evan.bar.means <- iBarriers.v %>% select(one_of(c("Diet", "Interest_Veganism", (lapply(Bar.scales, 
    names) %>% unlist)))) %>% group_by(Diet) %>% summarise_all(funs(mean))
Evan.bar.means.mlr <- nnet::multinom(Diet ~ ., data = Evan.bar.means)

## # weights:  92 (66 variable)
## initial  value 5.545177 
## iter  10 value 0.068637
## final  value 0.000070 
## converged

Evan.bar.means.mlr.pred <- predict(Evan.bar.means.mlr, iBarriers.v)
caret::confusionMatrix(Evan.bar.means.mlr.pred, iBarriers.v$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian
##   Omnivore          19           4           0          0
##   Flexitarian       16          14           3          4
##   Pescetarian        4           5           6          1
##   Vegetarian         5           1           3          5
## 
## Overall Statistics
##                                          
##                Accuracy : 0.4889         
##                  95% CI : (0.382, 0.5965)
##     No Information Rate : 0.4889         
##     P-Value [Acc > NIR] : 0.541638       
##                                          
##                   Kappa : 0.2945         
##  Mcnemar's Test P-Value : 0.003397       
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.4318             0.5833            0.50000
## Specificity                   0.9130             0.6515            0.87179
## Pos Pred Value                0.8261             0.3784            0.37500
## Neg Pred Value                0.6269             0.8113            0.91892
## Prevalence                    0.4889             0.2667            0.13333
## Detection Rate                0.2111             0.1556            0.06667
## Detection Prevalence          0.2556             0.4111            0.17778
## Balanced Accuracy             0.6724             0.6174            0.68590
##                      Class: Vegetarian
## Sensitivity                    0.50000
## Specificity                    0.88750
## Pos Pred Value                 0.35714
## Neg Pred Value                 0.93421
## Prevalence                     0.11111
## Detection Rate                 0.05556
## Detection Prevalence           0.15556
## Balanced Accuracy              0.69375

MLR: Evans Barriers with Demographic Variables included - Means

Evan.bar.means <- iBarriers.v %>% select(one_of(c("Diet", "Interest_Veganism", names(iBarriers.v)[29:36], 
    (lapply(Bar.scales, names) %>% unlist)))) %>% group_by(Diet) %>% summarise_all(funs(mean))
Evan.bar.means.mlr <- nnet::multinom(Diet ~ ., data = Evan.bar.means)

## # weights:  124 (90 variable)
## initial  value 5.545177 
## iter  10 value 0.018203
## iter  20 value 0.000103
## iter  20 value 0.000061
## iter  20 value 0.000059
## final  value 0.000059 
## converged

Evan.bar.means.mlr.pred <- predict(Evan.bar.means.mlr, iBarriers.v)
caret::confusionMatrix(Evan.bar.means.mlr.pred, iBarriers.v$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian
##   Omnivore          26           3           1          1
##   Flexitarian       15          18           2          4
##   Pescetarian        1           1           8          0
##   Vegetarian         2           2           1          5
## 
## Overall Statistics
##                                           
##                Accuracy : 0.6333          
##                  95% CI : (0.5251, 0.7325)
##     No Information Rate : 0.4889          
##     P-Value [Acc > NIR] : 0.004073        
##                                           
##                   Kappa : 0.4677          
##  Mcnemar's Test P-Value : 0.111298        
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.5909             0.7500            0.66667
## Specificity                   0.8913             0.6818            0.97436
## Pos Pred Value                0.8387             0.4615            0.80000
## Neg Pred Value                0.6949             0.8824            0.95000
## Prevalence                    0.4889             0.2667            0.13333
## Detection Rate                0.2889             0.2000            0.08889
## Detection Prevalence          0.3444             0.4333            0.11111
## Balanced Accuracy             0.7411             0.7159            0.82051
##                      Class: Vegetarian
## Sensitivity                    0.50000
## Specificity                    0.93750
## Pos Pred Value                 0.50000
## Neg Pred Value                 0.93750
## Prevalence                     0.11111
## Detection Rate                 0.05556
## Detection Prevalence           0.11111
## Balanced Accuracy              0.71875

MLR: Evans Barriers with Demographic Variables included - Items

Evan.bar.items <- iBarriers.v %>% select(one_of(c("Diet", "Interest_Veganism", names(iBarriers.v)[29:36], 
    (lapply(Bar.scales, names) %>% unlist))))
Evan.bar.items.mlr <- nnet::multinom(Diet ~ ., data = Evan.bar.items)

## # weights:  124 (90 variable)
## initial  value 124.766493 
## iter  10 value 51.953585
## iter  20 value 22.062162
## iter  30 value 6.221790
## iter  40 value 0.010267
## final  value 0.000052 
## converged

Evan.bar.items.mlr.pred <- predict(Evan.bar.items.mlr, iBarriers.v)
caret::confusionMatrix(Evan.bar.items.mlr.pred, iBarriers.v$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian
##   Omnivore          44           0           0          0
##   Flexitarian        0          24           0          0
##   Pescetarian        0           0          12          0
##   Vegetarian         0           0           0         10
## 
## Overall Statistics
##                                                
##                Accuracy : 1                    
##                  95% CI : (0.9598, 1)          
##     No Information Rate : 0.4889               
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 1                    
##  Mcnemar's Test P-Value : NA                   
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   1.0000             1.0000             1.0000
## Specificity                   1.0000             1.0000             1.0000
## Pos Pred Value                1.0000             1.0000             1.0000
## Neg Pred Value                1.0000             1.0000             1.0000
## Prevalence                    0.4889             0.2667             0.1333
## Detection Rate                0.4889             0.2667             0.1333
## Detection Prevalence          0.4889             0.2667             0.1333
## Balanced Accuracy             1.0000             1.0000             1.0000
##                      Class: Vegetarian
## Sensitivity                     1.0000
## Specificity                     1.0000
## Pos Pred Value                  1.0000
## Neg Pred Value                  1.0000
## Prevalence                      0.1111
## Detection Rate                  0.1111
## Detection Prevalence            0.1111
## Balanced Accuracy               1.0000

# Boom 100% Accuracy

MLR: Evans Barriers as Items with DemoVars Stepwise

Can variables be eliminated and retain the accuracy?

# Evan.bar.items.step <- step(Evan.bar.items.mlr,direction='both',trace=F)
# Evan.bar.items.step %>% summary
Evan.bar.items.step.mlr <- nnet::multinom(formula = Diet ~ Environmentalist + Feminist + 
    SJA + Group + Age_Cohort + Question17 + Question20 + Question28 + Question25 + 
    Question9 + Question4 + Question11 + Question13 + Question14, data = Evan.bar.items)

## # weights:  64 (45 variable)
## initial  value 124.766493 
## iter  10 value 56.269174
## iter  20 value 39.080114
## iter  30 value 32.373620
## iter  40 value 22.980824
## iter  50 value 19.151602
## iter  60 value 12.809248
## iter  70 value 12.326466
## iter  80 value 12.089918
## iter  90 value 12.066900
## iter 100 value 12.066540
## final  value 12.066540 
## stopped after 100 iterations

Evan.bar.items.step.mlr.pred <- predict(Evan.bar.items.step.mlr, Evan.bar.items)
caret::confusionMatrix(Evan.bar.items.step.mlr.pred, Evan.bar.items$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian
##   Omnivore          40           5           0          0
##   Flexitarian        4          19           0          0
##   Pescetarian        0           0          12          0
##   Vegetarian         0           0           0         10
## 
## Overall Statistics
##                                                
##                Accuracy : 0.9                  
##                  95% CI : (0.8186, 0.9532)     
##     No Information Rate : 0.4889               
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 0.8479               
##  Mcnemar's Test P-Value : NA                   
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.9091             0.7917             1.0000
## Specificity                   0.8913             0.9394             1.0000
## Pos Pred Value                0.8889             0.8261             1.0000
## Neg Pred Value                0.9111             0.9254             1.0000
## Prevalence                    0.4889             0.2667             0.1333
## Detection Rate                0.4444             0.2111             0.1333
## Detection Prevalence          0.5000             0.2556             0.1333
## Balanced Accuracy             0.9002             0.8655             1.0000
##                      Class: Vegetarian
## Sensitivity                     1.0000
## Specificity                     1.0000
## Pos Pred Value                  1.0000
## Neg Pred Value                  1.0000
## Prevalence                      0.1111
## Detection Rate                  0.1111
## Detection Prevalence            0.1111
## Balanced Accuracy               1.0000

Wow, 100% Accuracy with only 9 question and some Demographic variables

MLR: Evans Benefits

MLR: Evan’s Benefits as Items

cat("Diet~", lapply(Ben.scales, names) %>% unlist, sep = "+")

## Diet~+Question30+Question31+Question32+Question33+Question34+Question35+Question36+Question37+Question38+Question40+Question41+Question43+Question44+Question45+Question46+Question49+Question50+Question51+Question52+Question53

Evan.ben.mlr <- nnet::multinom(Diet ~ Question30 + Question31 + Question32 + Question33 + 
    Question34 + Question35 + Question36 + Question37 + Question38 + Question40 + 
    Question41 + Question43 + Question44 + Question45 + Question46 + Question49 + 
    Question50 + Question51 + Question52 + Question53, data = iBenefits)

## # weights:  110 (84 variable)
## initial  value 170.600419 
## iter  10 value 106.011315
## iter  20 value 82.260018
## iter  30 value 72.283157
## iter  40 value 68.723215
## iter  50 value 66.125303
## iter  60 value 61.641560
## iter  70 value 54.983769
## iter  80 value 52.116577
## iter  90 value 50.270400
## iter 100 value 49.801088
## final  value 49.801088 
## stopped after 100 iterations

Evan.ben.mlr.pred <- predict(Evan.ben.mlr, iBenefits)
caret::confusionMatrix(Evan.ben.mlr.pred, iBenefits$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian Vegan
##   Omnivore          38           6           1          0     0
##   Flexitarian        3          15           4          0     1
##   Pescetarian        1           3           6          0     0
##   Vegetarian         0           0           0         10     0
##   Vegan              2           0           1          0    15
## 
## Overall Statistics
##                                               
##                Accuracy : 0.7925              
##                  95% CI : (0.7028, 0.8651)    
##     No Information Rate : 0.4151              
##     P-Value [Acc > NIR] : 0.000000000000002373
##                                               
##                   Kappa : 0.7155              
##  Mcnemar's Test P-Value : NA                  
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.8636             0.6250            0.50000
## Specificity                   0.8871             0.9024            0.95745
## Pos Pred Value                0.8444             0.6522            0.60000
## Neg Pred Value                0.9016             0.8916            0.93750
## Prevalence                    0.4151             0.2264            0.11321
## Detection Rate                0.3585             0.1415            0.05660
## Detection Prevalence          0.4245             0.2170            0.09434
## Balanced Accuracy             0.8754             0.7637            0.72872
##                      Class: Vegetarian Class: Vegan
## Sensitivity                    1.00000       0.9375
## Specificity                    1.00000       0.9667
## Pos Pred Value                 1.00000       0.8333
## Neg Pred Value                 1.00000       0.9886
## Prevalence                     0.09434       0.1509
## Detection Rate                 0.09434       0.1415
## Detection Prevalence           0.09434       0.1698
## Balanced Accuracy              1.00000       0.9521

# 80% Accuracy

MLR: Evan’s Benefits as Means

Evan.ben.means <- vector("list", length(Ben.scales))
for (i in seq_along(Ben.scales)) {
    Evan.ben.means[[i]] <- iBenefits %>% select(one_of(c("Diet", "Interest_Veganism", 
        names(Ben.scales[[i]])))) %>% group_by(Diet) %>% summarise_all(funs(mean))
}
Evan.ben.means <- do.call(left_join, args = list(x = Evan.ben.means[[1]], y = Evan.ben.means[[2]], 
    by = c("Diet", "Interest_Veganism")))
Evan.ben.means.mlr <- nnet::multinom(Diet ~ ., data = Evan.ben.means)

## # weights:  115 (88 variable)
## initial  value 8.047190 
## iter  10 value 0.040793
## iter  20 value 0.000274
## final  value 0.000073 
## converged

Evan.ben.means.mlr.pred <- predict(Evan.ben.means.mlr, iBenefits)
caret::confusionMatrix(Evan.ben.means.mlr.pred, iBenefits$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian Vegan
##   Omnivore          13           2           1          1     0
##   Flexitarian       15          16           3          4     0
##   Pescetarian        8           4           7          0     0
##   Vegetarian         7           1           1          3     3
##   Vegan              1           1           0          2    13
## 
## Overall Statistics
##                                           
##                Accuracy : 0.4906          
##                  95% CI : (0.3922, 0.5895)
##     No Information Rate : 0.4151          
##     P-Value [Acc > NIR] : 0.07034         
##                                           
##                   Kappa : 0.3587          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.2955             0.6667            0.58333
## Specificity                   0.9355             0.7317            0.87234
## Pos Pred Value                0.7647             0.4211            0.36842
## Neg Pred Value                0.6517             0.8824            0.94253
## Prevalence                    0.4151             0.2264            0.11321
## Detection Rate                0.1226             0.1509            0.06604
## Detection Prevalence          0.1604             0.3585            0.17925
## Balanced Accuracy             0.6155             0.6992            0.72784
##                      Class: Vegetarian Class: Vegan
## Sensitivity                    0.30000       0.8125
## Specificity                    0.87500       0.9556
## Pos Pred Value                 0.20000       0.7647
## Neg Pred Value                 0.92308       0.9663
## Prevalence                     0.09434       0.1509
## Detection Rate                 0.02830       0.1226
## Detection Prevalence           0.14151       0.1604
## Balanced Accuracy              0.58750       0.8840

# 46%

MLR: Evans Benefits with Demographic Variables included - Items

Evan.ben.items <- iBenefits %>% select(one_of(c("Diet", "Interest_Veganism", names(iBarriers)[29:36], 
    (lapply(Ben.scales, names) %>% unlist))))
Evan.ben.items.mlr <- nnet::multinom(Diet ~ ., data = Evan.ben.items)

## # weights:  155 (120 variable)
## initial  value 168.990981 
## iter  10 value 66.945646
## iter  20 value 44.317171
## iter  30 value 29.950262
## iter  40 value 21.708441
## iter  50 value 19.349112
## iter  60 value 18.350967
## iter  70 value 18.109316
## iter  80 value 17.873811
## iter  90 value 17.709807
## iter 100 value 17.680862
## final  value 17.680862 
## stopped after 100 iterations

Evan.ben.items.mlr.pred <- predict(Evan.ben.items.mlr, iBenefits)
caret::confusionMatrix(Evan.ben.items.mlr.pred, iBenefits$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian Vegan
##   Omnivore          40           3           0          0     0
##   Flexitarian        4          21           0          0     0
##   Pescetarian        0           0          12          0     0
##   Vegetarian         0           0           0         10     0
##   Vegan              0           0           0          0    15
## 
## Overall Statistics
##                                                
##                Accuracy : 0.9333               
##                  95% CI : (0.8675, 0.9728)     
##     No Information Rate : 0.419                
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 0.9089               
##  Mcnemar's Test P-Value : NA                   
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.9091             0.8750             1.0000
## Specificity                   0.9508             0.9506             1.0000
## Pos Pred Value                0.9302             0.8400             1.0000
## Neg Pred Value                0.9355             0.9625             1.0000
## Prevalence                    0.4190             0.2286             0.1143
## Detection Rate                0.3810             0.2000             0.1143
## Detection Prevalence          0.4095             0.2381             0.1143
## Balanced Accuracy             0.9300             0.9128             1.0000
##                      Class: Vegetarian Class: Vegan
## Sensitivity                    1.00000       1.0000
## Specificity                    1.00000       1.0000
## Pos Pred Value                 1.00000       1.0000
## Neg Pred Value                 1.00000       1.0000
## Prevalence                     0.09524       0.1429
## Detection Rate                 0.09524       0.1429
## Detection Prevalence           0.09524       0.1429
## Balanced Accuracy              1.00000       1.0000

# 95% - Excellent

MLR: Evans Benefits with DemoVars - Stepwise

# Evan.ben.items.step <- step(Evan.ben.items.mlr,direction='both',trace=F)
# Evan.ben.items.step %>% summary
Evan.ben.items.step.mlr <- nnet::multinom(formula = Diet ~ Interest_Veganism + Feminist + 
    SJA + Animal_Lover + Gender + Age_Cohort + Question30 + Question31 + Question32 + 
    Question35 + Question37 + Question38 + Question45 + Question50 + Question52 + 
    Question53, data = Evan.ben.items)

## # weights:  90 (68 variable)
## initial  value 168.990981 
## iter  10 value 84.754305
## iter  20 value 63.044781
## iter  30 value 49.854825
## iter  40 value 39.872971
## iter  50 value 33.059102
## iter  60 value 28.881446
## iter  70 value 26.457202
## iter  80 value 23.259939
## iter  90 value 23.238566
## iter 100 value 23.238436
## final  value 23.238436 
## stopped after 100 iterations

Evan.ben.items.step.mlr.pred <- predict(Evan.ben.items.step.mlr, Evan.ben.items)
caret::confusionMatrix(Evan.ben.items.step.mlr.pred, Evan.ben.items$Diet)

## Confusion Matrix and Statistics
## 
##              Reference
## Prediction    Omnivore Flexitarian Pescetarian Vegetarian Vegan
##   Omnivore          40           5           0          0     0
##   Flexitarian        4          19           0          0     0
##   Pescetarian        0           0          12          0     0
##   Vegetarian         0           0           0         10     0
##   Vegan              0           0           0          0    15
## 
## Overall Statistics
##                                                
##                Accuracy : 0.9143               
##                  95% CI : (0.8435, 0.9601)     
##     No Information Rate : 0.419                
##     P-Value [Acc > NIR] : < 0.00000000000000022
##                                                
##                   Kappa : 0.8822               
##  Mcnemar's Test P-Value : NA                   
## 
## Statistics by Class:
## 
##                      Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity                   0.9091             0.7917             1.0000
## Specificity                   0.9180             0.9506             1.0000
## Pos Pred Value                0.8889             0.8261             1.0000
## Neg Pred Value                0.9333             0.9390             1.0000
## Prevalence                    0.4190             0.2286             0.1143
## Detection Rate                0.3810             0.1810             0.1143
## Detection Prevalence          0.4286             0.2190             0.1143
## Balanced Accuracy             0.9136             0.8711             1.0000
##                      Class: Vegetarian Class: Vegan
## Sensitivity                    1.00000       1.0000
## Specificity                    1.00000       1.0000
## Pos Pred Value                 1.00000       1.0000
## Neg Pred Value                 1.00000       1.0000
## Prevalence                     0.09524       0.1429
## Detection Rate                 0.09524       0.1429
## Detection Prevalence           0.09524       0.1429
## Balanced Accuracy              1.00000       1.0000

90% Accuracy with 10 Questions and DemoVars. Not bad!

Evan Parker Project

Stephen Synchronicity

2018-03-07