df <- read.spss("C:\\Users\\Stephen\\Documents\\Veginvesting\\Evan Project\\Parker_Research_Data.sav",
to.data.frame = TRUE)
df2 <- readxl::read_xlsx("C:\\Users\\Stephen\\Documents\\Veginvesting\\Evan Project\\Food Choice Sustainability Human Health and Morality_All Responses.xlsx")
qKey <- names(df2)
df <- as.data.frame(apply(df, 2, function(x) {
gsub("\\s?Strongly\\sAgree\\s?", 5, x)
}))
df <- as.data.frame(apply(df, 2, function(x) {
gsub("\\s?Strongly\\sDisagree\\s?", 1, x)
}))
df[, 5:61] <- apply(df[, 5:61], 2, function(x) {
as.numeric(x)
})
# Code NAs towards Central Tendency (neutral answer)
row.names(df) <- df$ID
df <- df[, -1]
# ------------------- Thu Feb 01 15:27:00 2018 --------------------# Code
# interest variable as numeric
codeinterest <- function(x) {
lgl <- vector()
lgl[1] <- str_detect(x, "I have no")
lgl[2] <- str_detect(x, "I have some")
lgl[3] <- str_detect(x, "I would like")
lgl[4] <- str_detect(x, "I am currently")
lgl[5] <- str_detect(x, "I am already")
out <- which(lgl)
return(out)
}
df[, 3] <- map_int(df[, 3], codeinterest)
# omit consent
df <- df[, -1]
# dummy code group
codegroup <- function(x) {
lgl <- vector("logical", length = 5)
lgl[1] <- str_detect(x, "Mountain")
lgl[3] <- str_detect(x, "Other")
lgl[5] <- str_detect(x, "Brother")
out <- which(lgl)
return(out)
}
df[["Group"]] <- map_int(df[["Group"]], codegroup)
# Dummy code Race
coderace <- function(x) {
lgl <- vector("logical", length = 5)
lgl[1] <- str_detect(x, "White")
lgl[2] <- str_detect(x, "Other")
lgl[3] <- str_detect(x, "Latino")
lgl[4] <- str_detect(x, "Asian")
lgl[5] <- str_detect(x, "Indian")
out <- which(lgl)
return(out)
}
df[["Race"]] <- map_int(df[["Race"]], coderace)
# Dummy Code Gender
codegen <- function(x) {
lgl <- vector("logical", length = 5)
lgl[1] <- str_detect(x, "Female")
lgl[3] <- is.na(x)
lgl[5] <- str_detect(x, "Male")
out <- which(lgl)
return(out)
}
df[["Gender"]] <- map_int(df[["Gender"]], codegen)
# Dummy code age
codeage <- function(x) {
lgl <- vector("logical", length = 5)
lgl[1] <- str_detect(x, "18-24")
lgl[2] <- str_detect(x, "25-34")
lgl[3] <- str_detect(x, "35-44")
lgl[4] <- str_detect(x, "45-54")
lgl[5] <- str_detect(x, "55-64")
lgl[6] <- str_detect(x, "65+")
out <- which(lgl)
return(out)
}
df[["Age_Cohort"]] <- map_int(df[["Age_Cohort"]], codeage)
library(scales)
df[["Age_Cohort"]] <- rescale(df[["Age_Cohort"]], to = c(1, 5))
df <- as.data.frame(df)
qKey <- data.frame(Short = names(df), Long = names(df2[-c(1:10)])[1:63], stringsAsFactors = F)
write_csv(df, "EPData.csv")
df <- read.csv("EPData.csv", stringsAsFactors = T)
BarH <- c("There are not enough vitamins and minerals in vegan diets", "I wouldn’t (or don’t) get enough energy or strength from vegan food",
"There is not enough protein in vegan diets", "I would be (or am) worried about my health")
BarP <- c("I like eating meat", "I think humans are meant to eat animal products (meat, fish, dairy, or eggs)",
"Vegan diets are boring", "Vegan diets are not filling enough", "I like dairy products and/or eggs")
BarC <- c("It is inconvenient", "It takes too long to prepare vegan food", "Vegan options are not available where I shop or at my home",
"I don’t have enough willpower")
BarK <- c("I don’t know what to eat instead of animal products", "I lack the right cooking skills",
"I need more information about vegan diets")
BarN <- c("My friends eat animal products", "I don’t want people to stereotype me negatively (e.g. that I must be strange)",
"My family eats animal products", "My family/spouse/partner won’t eat vegan food")
BarC.key <- qKey[c(apply(sapply(substr(BarC, 0, 20), FUN = str_detect, string = qKey$Long,
simplify = T), 2, FUN = which)), ]
BarH.key <- qKey[c(25:28), ]
BarK.key <- qKey[c(apply(sapply(substr(BarK, 0, 20), FUN = str_detect, string = qKey$Long,
simplify = T), 2, FUN = which)), ]
BarN.key <- qKey[c(apply(sapply(substr(BarN, 0, 20), FUN = str_detect, string = qKey$Long,
simplify = T), 2, FUN = which)), ]
BarP.key <- qKey[c(apply(sapply(substr(BarP, 0, 20), FUN = str_detect, string = qKey$Long,
simplify = T), 2, FUN = which)), ]
BenME <- c("Reduce greenhouse gas emissions and contributions to climate change",
"Help eliminate animal exploitation", "Help animal rights", "Reduce deforestation and land-use change",
"Help animal welfare", "Reduce water pollution", "Decrease world hunger", "Increase the efficiency of food production",
"Create a more peaceful world")
BenME.key <- qKey[qKey$Long %in% BenME, ]
BenH <- c("Be fit", "Have a better quality of life", "Have plenty of energy", "Increase my control over my own health",
"Live longer", "Stay healthy", "Prevent disease in general (e.g. heart disease, cancer)",
"Eat more fruit and vegetables", "Be healthier by decreasing my intake of chemicals, steroids and antibiotics which are found in animal products",
"Lower my chances of getting food poisoning", "Decrease saturated fat intake in my diet")
BenH.key <- qKey[qKey$Long %in% BenH, ]
# ----------------------- Sun Mar 04 17:27:45 2018 ------------------------#
# Amelia assumes a normal distribution in each attribute, and attempts to replace
# values while preserving variance. See http://gking.harvard.edu/amelia for
# details.
library(Amelia)
levels(Barriers$Diet)
## [1] "Flexitarian" "Omnivore" "Pescetarian" "Vegan" "Vegetarian"
## [1] FALSE
# ----------------------- Sun Mar 04 17:03:34 2018 ------------------------# Make
# Diet an ordinal variable
Barriers$Diet <- factor(Barriers$Diet, levels = c("Omnivore", "Flexitarian", "Pescetarian",
"Vegetarian", "Vegan"), ordered = T)
Benefits$Diet <- factor(Benefits$Diet, levels = c("Omnivore", "Flexitarian", "Pescetarian",
"Vegetarian", "Vegan"), ordered = T)
nmBarriers <- names(Barriers)
(Categorical <- nmBarriers[c(1, 2, 29:36)])
## [1] "Diet" "Interest_Veganism" "Environmentalist"
## [4] "Feminist" "SJA" "Animal_Lover"
## [7] "Group" "Race" "Gender"
## [10] "Age_Cohort"
numBarriers <- Barriers %>% select(-one_of(Categorical))
Barriers.imputed <- Amelia::amelia(Barriers, m = 1, p2s = 2, idvars = Categorical)
##
## amelia starting
## beginning prep functions
## Variables used: Question4 Question5 Question6 Question7 Question8 Question9 Question10 Question11 Question12 Question13 Question14 Question15 Question16 Question17 Question18 Question19 Question20 Question21 Question22 Question23 Question24 Question25 Question26 Question27 Question28 Question29
## running bootstrap
## -- Imputation 1 --
## setting up EM chain indicies
##
## 1(377) 2(208) 3(192) 4(124) 5(98) 6(97) 7(93) 8(83) 9(71) 10(57) 11(46) 12(32) 13(15) 14(7) 15(1) 16(0)
##
## saving and cleaning
## [1] 22
# Save the matrix with complete data
iBarriers <- Barriers.imputed[["imputations"]][["imp1"]]
# ----------------------- Sun Mar 04 17:25:26 2018 ------------------------#
# Impute values in Benefits dataset
Benefits.imputed <- Amelia::amelia(Benefits, m = 1, p2s = 2, idvars = Categorical)
##
## amelia starting
## beginning prep functions
## Variables used: Question30 Question31 Question32 Question33 Question34 Question35 Question36 Question37 Question38 Question39 Question40 Question41 Question42 Question43 Question44 Question45 Question46 Question47 Question48 Question49 Question50 Question51 Question52 Question53 Question54 Question55 Question56
## running bootstrap
## -- Imputation 1 --
## setting up EM chain indicies
##
## 1(405) 2(311) 3(67) 4(55) 5(40) 6(11) 7(0)
##
## saving and cleaning
## [1] 31
Original Factor analysis run blind to explanation of data is
colnames(dfNum) <- c("Id", qKey$Short[13:69])
psych::fa.parallel(dfNum[, -c(1)], fa = "pc", main = "# of Factors", n.iter = 10,
use = "pairwise", quant = 0.95)
fadfNum <- psych::fa(dfNum[, -c(1)], nfactors = 4, n.iter = 10, residuals = T, alpha = 0.05,
fm = "ml")
dsNumF1 <- as.data.frame(cbind(Lvl = c(rep("H", 5), rep("T", 5)), Q = c(names(head(fadfNum$loadings[,
1][order(fadfNum$loadings[, 1], decreasing = T)], 5)), names(tail(fadfNum$loadings[,
1][order(fadfNum$loadings[, 1], decreasing = T)], 5)))))
dsNumF2 <- as.data.frame(cbind(Lvl = c(rep("H", 5), rep("T", 5)), Q = c(names(head(fadfNum$loadings[,
2][order(fadfNum$loadings[, 2], decreasing = T)], 5)), names(tail(fadfNum$loadings[,
2][order(fadfNum$loadings[, 2], decreasing = T)], 5)))))
dsNumF3 <- as.data.frame(cbind(Lvl = c(rep("H", 5), rep("T", 5)), Q = c(names(head(fadfNum$loadings[,
3][order(fadfNum$loadings[, 3], decreasing = T)], 5)), names(tail(fadfNum$loadings[,
3][order(fadfNum$loadings[, 3], decreasing = T)], 5)))))
dsNumF4 <- as.data.frame(cbind(Lvl = c(rep("H", 5), rep("T", 5)), Q = c(names(head(fadfNum$loadings[,
4][order(fadfNum$loadings[, 4], decreasing = T)], 5)), names(tail(fadfNum$loadings[,
4][order(fadfNum$loadings[, 4], decreasing = T)], 5)))))
DT::datatable(dsNumF1)
DT::datatable(dsNumF2)
DT::datatable(dsNumF3)
DT::datatable(dsNumF4)
## Parallel analysis suggests that the number of factors = 5 and the number of components = 5
# It looks like the parallel test suggests 5 factors/components, excellent.
# Model Evaluation: To choose a factoring method 'fm' we will use an iterative
# approach that runs with each type and examine the R^2 values of each method to
# determine which model best explains the data.
nfac <- 5
fm <- c("minres", "pa", "wls", "gls", "ml")
faResults <- vector("list", 5)
faR2Mat <- matrix(data = rep(NA, 25), nrow = 5, ncol = nfac)
for (i in seq_along(fm)) {
Result <- psych::fa(iBarriers %>% select(-one_of(Categorical)), nfactors = nfac,
max.iter = 10, alpha = 0.1, fm = fm[i], warnings = T)
faR2Mat[i, ] <- Result[["R2.scores"]]
}
# Sum the rows to determine which explains the data the best
rowMeans(faR2Mat)
## [1] 0.8373863 0.8371888 0.8419728 0.8474469 0.8381363
# Additional trials show stable R^2 values. Thus, it appears that the
# 'generalized weighted least squares' factoring method best explains the data,
# we will use this model for further exploration.
faBestModel <- psych::fa(iBarriers %>% select(-one_of(Categorical)), nfactors = nfac,
max.iter = 10, alpha = 0.1, fm = "gls", warnings = T)
psych::alpha(iBarriers %>% select(-one_of(Categorical)))
##
## Reliability analysis
## Call: psych::alpha(x = iBarriers %>% select(-one_of(Categorical)))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.84 0.84 0.91 0.17 5.3 0.023 2.5 0.55
##
## lower alpha upper 95% confidence boundaries
## 0.8 0.84 0.89
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## Question4 0.84 0.83 0.90 0.17 5.0 0.025
## Question5 0.84 0.84 0.91 0.17 5.1 0.024
## Question6 0.84 0.84 0.91 0.17 5.3 0.023
## Question7 0.84 0.84 0.91 0.18 5.4 0.023
## Question8 0.84 0.84 0.91 0.17 5.1 0.024
## Question9 0.84 0.84 0.91 0.17 5.2 0.024
## Question10 0.84 0.84 0.91 0.18 5.4 0.023
## Question11 0.84 0.84 0.91 0.17 5.2 0.024
## Question12 0.84 0.84 0.91 0.18 5.3 0.024
## Question13 0.83 0.83 0.90 0.16 4.9 0.025
## Question14 0.83 0.83 0.90 0.16 4.8 0.026
## Question15 0.84 0.84 0.91 0.17 5.2 0.024
## Question16 0.84 0.84 0.91 0.18 5.3 0.023
## Question17 0.84 0.83 0.91 0.17 5.0 0.024
## Question18 0.84 0.84 0.91 0.18 5.3 0.023
## Question19 0.83 0.83 0.91 0.16 4.9 0.025
## Question20 0.84 0.83 0.91 0.17 5.0 0.024
## Question21 0.83 0.83 0.91 0.16 4.9 0.025
## Question22 0.84 0.84 0.91 0.17 5.2 0.024
## Question23 0.84 0.83 0.91 0.17 5.0 0.025
## Question24 0.84 0.84 0.91 0.17 5.1 0.024
## Question25 0.84 0.84 0.91 0.17 5.1 0.024
## Question26 0.83 0.83 0.91 0.17 4.9 0.025
## Question27 0.83 0.83 0.91 0.16 4.8 0.025
## Question28 0.84 0.84 0.91 0.17 5.1 0.024
## Question29 0.83 0.83 0.91 0.16 4.9 0.025
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## Question4 90 0.54 0.52 0.51 0.46 3.5 1.52
## Question5 90 0.46 0.42 0.41 0.36 3.1 1.58
## Question6 90 0.32 0.32 0.28 0.23 2.6 1.39
## Question7 90 0.21 0.26 0.22 0.17 1.3 0.66
## Question8 90 0.43 0.44 0.41 0.35 2.3 1.37
## Question9 90 0.34 0.34 0.32 0.27 1.6 1.05
## Question10 90 0.17 0.22 0.17 0.13 1.2 0.49
## Question11 90 0.40 0.38 0.37 0.31 3.4 1.41
## Question12 90 0.27 0.27 0.22 0.21 4.4 0.88
## Question13 90 0.62 0.62 0.62 0.57 2.3 1.10
## Question14 90 0.67 0.67 0.68 0.63 2.4 1.20
## Question15 90 0.42 0.41 0.38 0.34 3.1 1.36
## Question16 90 0.30 0.30 0.27 0.23 2.9 1.15
## Question17 90 0.51 0.51 0.50 0.44 3.5 1.19
## Question18 90 0.28 0.29 0.25 0.20 2.2 1.17
## Question19 90 0.59 0.60 0.59 0.54 2.4 1.07
## Question20 90 0.49 0.49 0.46 0.42 2.6 1.36
## Question21 90 0.58 0.58 0.57 0.52 1.9 1.13
## Question22 90 0.38 0.38 0.35 0.30 3.2 1.21
## Question23 90 0.53 0.53 0.51 0.46 2.5 1.35
## Question24 90 0.44 0.45 0.43 0.36 2.0 1.19
## Question25 90 0.46 0.47 0.45 0.39 2.1 1.25
## Question26 90 0.56 0.56 0.56 0.50 2.2 1.14
## Question27 90 0.65 0.64 0.64 0.59 2.5 1.34
## Question28 90 0.45 0.44 0.41 0.36 2.6 1.38
## Question29 90 0.58 0.58 0.58 0.52 2.5 1.21
# ----------------------- Sun Mar 04 17:47:26 2018 ------------------------#
# TODO(Examine the factors, look at H/T to determine question correlates and see
# if the questions with high weights correspond to Evan's. If not, what do the
# patterns in this analysis suggest about other potential components to explain
# the data?)
Barriers.Qs <- names(df2)[11:38]
loadings2pval <- function(loadings) {
# function input - loadings from PCA or FA, output - labelled pvalues for
# loadings
c <- ncol(loadings)
if (is.null(c)) {
c <- 1
}
if (c > 1) {
pvals <- loadings %>% unclass %>% apply(2, FUN = scale)
} else {
pvals <- loadings %>% unclass %>% scale
}
for (i in 1:c) {
m = mean(pvals[, i])
sd = sd(pvals[, i])
for (r in seq_along(pvals[, i])) {
pvals[r, i] <- 2 * pnorm(-abs(pvals[r, i]), m = m, sd = sd)
}
}
rownames(pvals) <- rownames(loadings)
return(pvals)
}
pvals.fa <- loadings2pval(faBestModel$loadings)
loadTotals <- as.data.frame(cbind(faBestModel[["Structure"]], Sum = rowSums(apply(abs(faBestModel[["Structure"]]),
2, FUN = abs))))
loadProps <- loadTotals %>% mutate_at(vars(c(1:nfac)), function(x) {
abs(x)/loadTotals$Sum
}) %>% cbind(oQues = rownames(loadTotals))
faLoadings <- vector("list", nfac)
for (i in 1:nfac) {
l <- faBestModel[["loadings"]] %>% unclass %>% .[, paste("GLS", i, sep = "")]
h <- head(l[order(l, decreasing = T)], 5)
t <- tail(l[order(l, decreasing = T)], 5)
faLoadings[[i]] <- data.frame(oQues = c(names(h), names(t)), Loadings = c(h,
t)) %>% inner_join(qKey, by = c(oQues = "Short")) %>% mutate(pVal = pvals.fa[oQues,
paste("GLS", i, sep = "")]) %>% inner_join(loadProps)
}
# ----------------------- Mon Mar 05 07:38:49 2018 ------------------------# ML
# columns provide the proportion of load shared between variables
DT::datatable(faLoadings[[1]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
DT::datatable(faLoadings[[2]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
DT::datatable(faLoadings[[3]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
## Parallel analysis suggests that the number of factors = 3 and the number of components = 2
# It looks like the parallel test suggests 3 factors, we will try 2 and 3 to
# compare it with Evans. Model Evaluation: To choose a factoring method 'fm' we
# will use an iterative approach that runs with each type and examine the R^2
# values of each method to determine which model best explains the data.
nfac <- 3
fm <- c("minres", "pa", "wls", "gls", "ml")
faBenR2Mat <- matrix(data = rep(NA, 9), nrow = 5, ncol = nfac)
for (i in seq_along(fm)) {
Result <- psych::fa(iBenefits %>% select(-one_of(Categorical)), nfactors = nfac,
max.iter = 10, alpha = 0.1, fm = fm[i], warnings = T)
faBenR2Mat[i, ] <- Result[["R2.scores"]]
}
# Sum the rows to determine which explains the data the best
rowSums(faBenR2Mat)
## [1] 2.781406 2.781063 2.802299 2.857590 2.875187
# Additional trials show stable R^2 values. Thus, it appears that the 'maximum
# likelihood' factoring method best explains the data, we will use this model for
# further exploration.
faBenBestModel <- psych::fa(iBenefits %>% select(-one_of(Categorical)), nfactors = nfac,
max.iter = 10, alpha = 0.1, fm = "ml", warnings = T)
psych::alpha(iBenefits %>% select(-one_of(Categorical)))
##
## Reliability analysis
## Call: psych::alpha(x = iBenefits %>% select(-one_of(Categorical)))
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd
## 0.96 0.96 0.98 0.48 24 0.0056 3.5 0.87
##
## lower alpha upper 95% confidence boundaries
## 0.95 0.96 0.97
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se
## Question30 0.96 0.96 0.98 0.47 23 0.0058
## Question31 0.96 0.96 0.98 0.47 23 0.0058
## Question32 0.96 0.96 0.98 0.47 23 0.0058
## Question33 0.96 0.96 0.98 0.47 23 0.0058
## Question34 0.96 0.96 0.98 0.47 23 0.0058
## Question35 0.96 0.96 0.98 0.48 24 0.0056
## Question36 0.96 0.96 0.98 0.48 24 0.0057
## Question37 0.96 0.96 0.98 0.47 23 0.0059
## Question38 0.96 0.96 0.98 0.47 23 0.0060
## Question39 0.96 0.96 0.98 0.48 24 0.0056
## Question40 0.96 0.96 0.98 0.47 24 0.0058
## Question41 0.96 0.96 0.98 0.47 23 0.0058
## Question42 0.96 0.96 0.98 0.47 23 0.0060
## Question43 0.96 0.96 0.98 0.48 24 0.0057
## Question44 0.96 0.96 0.98 0.48 24 0.0058
## Question45 0.96 0.96 0.98 0.48 24 0.0057
## Question46 0.96 0.96 0.98 0.47 23 0.0060
## Question47 0.96 0.96 0.98 0.48 24 0.0056
## Question48 0.96 0.96 0.98 0.48 24 0.0057
## Question49 0.96 0.96 0.98 0.47 23 0.0058
## Question50 0.96 0.96 0.98 0.48 24 0.0057
## Question51 0.96 0.96 0.98 0.48 24 0.0057
## Question52 0.96 0.96 0.98 0.48 24 0.0058
## Question53 0.96 0.96 0.98 0.48 24 0.0057
## Question54 0.96 0.96 0.98 0.48 24 0.0058
## Question55 0.96 0.96 0.98 0.47 23 0.0058
## Question56 0.96 0.96 0.98 0.49 25 0.0055
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## Question30 106 0.73 0.74 0.74 0.70 3.8 1.16
## Question31 106 0.71 0.72 0.72 0.68 3.7 1.18
## Question32 106 0.75 0.76 0.76 0.73 3.7 1.15
## Question33 106 0.74 0.75 0.74 0.72 3.9 1.17
## Question34 106 0.72 0.72 0.70 0.69 3.1 1.36
## Question35 106 0.55 0.56 0.54 0.52 4.4 0.92
## Question36 106 0.57 0.59 0.57 0.55 4.2 0.97
## Question37 106 0.81 0.82 0.82 0.80 3.5 1.12
## Question38 106 0.85 0.86 0.86 0.84 3.3 1.29
## Question39 106 0.61 0.62 0.61 0.57 3.5 1.24
## Question40 106 0.71 0.71 0.71 0.68 3.2 1.29
## Question41 106 0.77 0.78 0.78 0.75 3.3 1.20
## Question42 106 0.84 0.84 0.84 0.82 3.2 1.42
## Question43 106 0.69 0.69 0.69 0.66 4.1 1.22
## Question44 106 0.70 0.70 0.70 0.68 4.1 1.22
## Question45 106 0.70 0.70 0.70 0.67 4.0 1.29
## Question46 106 0.82 0.81 0.81 0.80 3.3 1.43
## Question47 106 0.60 0.59 0.57 0.56 2.5 1.55
## Question48 106 0.67 0.66 0.65 0.64 2.2 1.23
## Question49 106 0.75 0.74 0.73 0.72 3.2 1.31
## Question50 106 0.67 0.66 0.66 0.64 3.8 1.27
## Question51 106 0.70 0.69 0.69 0.67 3.7 1.29
## Question52 106 0.72 0.71 0.71 0.69 3.8 1.22
## Question53 106 0.63 0.63 0.61 0.60 3.7 1.16
## Question54 106 0.71 0.71 0.70 0.68 3.3 1.24
## Question55 106 0.74 0.74 0.74 0.72 2.8 1.24
## Question56 106 0.51 0.51 0.49 0.47 2.9 1.27
# ----------------------- Mon Mar 05 08:52:05 2018 ------------------------# Calc
# benefits
pvals.fa <- loadings2pval(faBenBestModel$loadings)
loadTotals <- as.data.frame(cbind(faBenBestModel[["Structure"]], Sum = rowSums(apply(abs(faBenBestModel[["Structure"]]),
2, FUN = abs))))
loadProps <- loadTotals %>% mutate_at(vars(c(1:nfac)), function(x) {
abs(x)/loadTotals$Sum
}) %>% cbind(oQues = rownames(loadTotals))
faBenLoadings <- vector("list", nfac)
for (i in 1:nfac) {
l <- faBenBestModel[["loadings"]] %>% unclass %>% .[, paste("ML", i, sep = "")]
h <- head(l[order(l, decreasing = T)], 5)
t <- tail(l[order(l, decreasing = T)], 5)
faBenLoadings[[i]] <- data.frame(oQues = c(names(h), names(t)), Loadings = c(h,
t)) %>% inner_join(qKey, by = c(oQues = "Short")) %>% mutate(pVal = pvals.fa[oQues,
paste("ML", i, sep = "")]) %>% inner_join(loadProps)
}
DT::datatable(faBenLoadings[[1]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
rotations <- c("none", "varimax", "quartimax", "promax", "oblimin", "simplimax",
"cluster")
nfac <- 5
rms <- vector()
for (i in seq_along(rotations)) {
Result <- psych::pca(iBarriers %>% select(-one_of(Categorical)), nfactors = nfac,
rotate = rotations[i])
rms[i] <- Result[["rms"]]
}
# ----------------------- Mon Mar 05 10:39:55 2018 ------------------------# All
# rotations return the same root mean squared error, thus 'none' will be used for
# the rotation parameter
pcaBar <- psych::pca(iBarriers %>% select(-one_of(Categorical)), nfactors = nfac,
rotate = "none")
pvals.pca <- loadings2pval(pcaBar$loadings)
loadTotals <- as.data.frame(cbind(pcaBar[["Structure"]], Sum = rowSums(apply(abs(pcaBar[["Structure"]]),
2, FUN = abs))))
loadProps <- loadTotals %>% mutate_at(vars(c(1:nfac)), function(x) {
abs(x)/loadTotals$Sum
}) %>% cbind(oQues = rownames(loadTotals))
pcaBarLoadings <- vector("list", nfac)
for (i in 1:nfac) {
l <- pcaBar[["loadings"]] %>% unclass %>% .[, paste("PC", i, sep = "")]
h <- head(l[order(l, decreasing = T)], 5)
t <- tail(l[order(l, decreasing = T)], 5)
pcaBarLoadings[[i]] <- data.frame(oQues = c(names(h), names(t)), Loadings = c(h,
t)) %>% inner_join(qKey, by = c(oQues = "Short")) %>% mutate(pVal = pvals.pca[oQues,
paste("PC", i, sep = "")]) %>% inner_join(loadProps)
}
DT::datatable(pcaBarLoadings[[1]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
DT::datatable(pcaBarLoadings[[2]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
DT::datatable(pcaBarLoadings[[3]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
rotations <- c("none", "varimax", "quartimax", "promax", "oblimin", "simplimax",
"cluster")
rms <- vector()
nfac <- 2
for (i in seq_along(rotations)) {
Result <- psych::pca(iBenefits %>% select(-one_of(Categorical)), nfactors = nfac,
rotate = rotations[i])
rms[i] <- Result[["rms"]]
}
# ----------------------- Mon Mar 05 10:39:55 2018 ------------------------# All
# rotations return the same root mean squared error, thus 'none' will be used for
# the rotation parameter
pcaBen <- psych::pca(iBenefits %>% select(-one_of(Categorical)), nfactors = nfac,
rotate = "none")
pvals.pca <- loadings2pval(pcaBen$loadings)
loadTotals <- as.data.frame(cbind(pcaBen[["Structure"]], Sum = rowSums(apply(abs(pcaBen[["Structure"]]),
2, FUN = abs))))
loadProps <- loadTotals %>% mutate_at(vars(c(1:nfac)), function(x) {
abs(x)/loadTotals$Sum
}) %>% cbind(oQues = rownames(loadTotals))
pcaBenLoadings <- vector("list", nfac)
for (i in 1:nfac) {
l <- pcaBen[["loadings"]] %>% unclass %>% .[, paste("PC", i, sep = "")]
h <- head(l[order(l, decreasing = T)], 5)
t <- tail(l[order(l, decreasing = T)], 5)
pcaBenLoadings[[i]] <- data.frame(oQues = c(names(h), names(t)), Loadings = c(h,
t)) %>% inner_join(qKey, by = c(oQues = "Short")) %>% mutate(pVal = pvals.pca[oQues,
paste("PC", i, sep = "")]) %>% inner_join(loadProps)
}
DT::datatable(pcaBenLoadings[[1]]) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
library(caret)
trControl <- caret::trainControl(method = "repeatedcv", number = 4, index = caret::createFolds(Barriers,
k = 10), repeats = 4, p = 0.75, allowParallel = T)
library(doParallel)
# make a cluster with 6 cores
cl <- makeCluster(detectCores() - 2, type = "PSOCK")
# register the number of parallel workers
registerDoParallel(cl)
# return number of parallel workers
getDoParWorkers()
## [1] 6
iBarriers.v <- iBarriers %>% filter(Diet != "Vegan")
iBarriers.v$Diet <- iBarriers.v %>% .$Diet %>% factor
best.mlm <- caret::train(Diet ~ . - Interest_Veganism, data = iBarriers.v, method = "multinom",
trControl = trControl)
## # weights: 144 (105 variable)
## initial value 124.766493
## iter 10 value 47.740691
## iter 20 value 27.363454
## iter 30 value 21.431328
## iter 40 value 20.791093
## iter 50 value 20.548323
## iter 60 value 20.481806
## iter 70 value 20.476857
## final value 20.476767
## converged
stopCluster(cl)
# ----------------------- Tue Mar 06 11:41:56 2018 ------------------------# Test
# Accuracy
caret::confusionMatrix(predict(best.mlm), iBarriers.v$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian
## Omnivore 44 0 0 0
## Flexitarian 0 24 0 0
## Pescetarian 0 0 12 0
## Vegetarian 0 0 0 10
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9598, 1)
## No Information Rate : 0.4889
## P-Value [Acc > NIR] : < 0.00000000000000022
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.4889 0.2667 0.1333
## Detection Rate 0.4889 0.2667 0.1333
## Detection Prevalence 0.4889 0.2667 0.1333
## Balanced Accuracy 1.0000 1.0000 1.0000
## Class: Vegetarian
## Sensitivity 1.0000
## Specificity 1.0000
## Pos Pred Value 1.0000
## Neg Pred Value 1.0000
## Prevalence 0.1111
## Detection Rate 0.1111
## Detection Prevalence 0.1111
## Balanced Accuracy 1.0000
# 100% accurate. Model is likely overfitted to training data, but this will lead
# to interesting insight about the factors that predict preference.
# ----------------------- Tue Mar 06 08:45:06 2018 ------------------------#
# Display coefficients on both models
best.mlm.coef <- coef(best.mlm$finalModel) %>% as.matrix()
best.mlm.coef <- as.data.frame(best.mlm.coef, stringsAsFactors = F) %>% rownames_to_column() %>%
gather(key = "rowname", value = -rowname) %>% cbind(Feat = rep(rownames(coef(best.mlm$finalModel)),
35)) %>% rename(Coef = "-rowname") %>% mutate(pVal = as.vector(loadings2pval(Coef))) %>%
arrange(pVal) %>% inner_join(qKey, by = c(rowname = "Short"))
DT::datatable(best.mlm.coef) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
## # weights: 148 (108 variable)
## initial value 124.766493
## iter 10 value 44.963112
## iter 20 value 14.042768
## iter 30 value 0.411924
## iter 40 value 0.002488
## final value 0.000081
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question5
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question9
## trying - Question10
## trying - Question11
## trying - Question12
## trying - Question13
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question23
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - SJA
## trying - Animal_Lover
## trying - Group
## trying - Race
## trying - Gender
## trying - Age_Cohort
## # weights: 144 (105 variable)
## initial value 124.766493
## iter 10 value 46.188223
## iter 20 value 19.490316
## iter 30 value 1.564995
## iter 40 value 0.005630
## final value 0.000050
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question9
## trying - Question10
## trying - Question11
## trying - Question12
## trying - Question13
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question23
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - SJA
## trying - Animal_Lover
## trying - Group
## trying - Race
## trying - Gender
## trying - Age_Cohort
## trying + Question5
## # weights: 140 (102 variable)
## initial value 124.766493
## iter 10 value 46.450451
## iter 20 value 16.214609
## iter 30 value 0.547619
## iter 40 value 0.006369
## final value 0.000051
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question9
## trying - Question10
## trying - Question11
## trying - Question12
## trying - Question13
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question23
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - SJA
## trying - Animal_Lover
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Gender
## # weights: 136 (99 variable)
## initial value 124.766493
## iter 10 value 47.663897
## iter 20 value 19.909807
## iter 30 value 1.284903
## iter 40 value 0.015059
## final value 0.000052
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question9
## trying - Question10
## trying - Question11
## trying - Question13
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question23
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - SJA
## trying - Animal_Lover
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question12
## trying + Gender
## # weights: 132 (96 variable)
## initial value 124.766493
## iter 10 value 48.201173
## iter 20 value 21.442639
## iter 30 value 3.347825
## iter 40 value 0.037137
## iter 50 value 0.001299
## final value 0.000051
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question10
## trying - Question11
## trying - Question13
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question23
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - SJA
## trying - Animal_Lover
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question12
## trying + Gender
## # weights: 128 (93 variable)
## initial value 124.766493
## iter 10 value 41.753508
## iter 20 value 20.435798
## iter 30 value 5.015973
## iter 40 value 0.061489
## iter 50 value 0.000103
## iter 50 value 0.000053
## iter 50 value 0.000053
## final value 0.000053
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question10
## trying - Question11
## trying - Question13
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question23
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Animal_Lover
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question12
## trying + SJA
## trying + Gender
## # weights: 124 (90 variable)
## initial value 124.766493
## iter 10 value 41.668516
## iter 20 value 21.060753
## iter 30 value 6.647382
## iter 40 value 0.128429
## iter 50 value 0.000198
## final value 0.000052
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question13
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question23
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Animal_Lover
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + SJA
## trying + Gender
## # weights: 120 (87 variable)
## initial value 124.766493
## iter 10 value 43.195177
## iter 20 value 23.408919
## iter 30 value 11.047688
## iter 40 value 0.897889
## iter 50 value 0.001465
## final value 0.000052
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question23
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Animal_Lover
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + SJA
## trying + Gender
## # weights: 116 (84 variable)
## initial value 124.766493
## iter 10 value 44.014714
## iter 20 value 28.145918
## iter 30 value 14.241176
## iter 40 value 3.262813
## iter 50 value 0.004956
## final value 0.000052
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Animal_Lover
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question23
## trying + SJA
## trying + Gender
## # weights: 112 (81 variable)
## initial value 124.766493
## iter 10 value 45.247957
## iter 20 value 24.673261
## iter 30 value 11.290692
## iter 40 value 0.327068
## iter 50 value 0.000602
## final value 0.000054
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question17
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question23
## trying + SJA
## trying + Animal_Lover
## trying + Gender
## # weights: 108 (78 variable)
## initial value 124.766493
## iter 10 value 46.608951
## iter 20 value 25.923687
## iter 30 value 13.895202
## iter 40 value 0.711476
## iter 50 value 0.002131
## final value 0.000052
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question24
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question17
## trying + Question23
## trying + SJA
## trying + Animal_Lover
## trying + Gender
## # weights: 104 (75 variable)
## initial value 124.766493
## iter 10 value 47.731850
## iter 20 value 27.655145
## iter 30 value 15.359877
## iter 40 value 1.671001
## iter 50 value 0.003140
## final value 0.000052
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question22
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question17
## trying + Question23
## trying + Question24
## trying + SJA
## trying + Animal_Lover
## trying + Gender
## # weights: 100 (72 variable)
## initial value 124.766493
## iter 10 value 49.616446
## iter 20 value 29.557102
## iter 30 value 16.241720
## iter 40 value 7.972317
## iter 50 value 0.099680
## iter 60 value 0.000101
## iter 60 value 0.000051
## iter 60 value 0.000051
## final value 0.000051
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question18
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question17
## trying + Question22
## trying + Question23
## trying + Question24
## trying + SJA
## trying + Animal_Lover
## trying + Gender
## # weights: 96 (69 variable)
## initial value 124.766493
## iter 10 value 49.977540
## iter 20 value 31.940686
## iter 30 value 17.790865
## iter 40 value 9.854907
## iter 50 value 3.111797
## iter 60 value 0.002994
## final value 0.000052
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question14
## trying - Question15
## trying - Question16
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question17
## trying + Question18
## trying + Question22
## trying + Question23
## trying + Question24
## trying + SJA
## trying + Animal_Lover
## trying + Gender
## # weights: 92 (66 variable)
## initial value 124.766493
## iter 10 value 52.189172
## iter 20 value 34.274533
## iter 30 value 20.520691
## iter 40 value 10.193880
## iter 50 value 0.362358
## iter 60 value 0.001199
## final value 0.000054
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question15
## trying - Question16
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question14
## trying + Question17
## trying + Question18
## trying + Question22
## trying + Question23
## trying + Question24
## trying + SJA
## trying + Animal_Lover
## trying + Gender
## # weights: 88 (63 variable)
## initial value 124.766493
## iter 10 value 56.558441
## iter 20 value 34.978416
## iter 30 value 23.614229
## iter 40 value 13.036827
## iter 50 value 6.533934
## iter 60 value 0.042223
## final value 0.000053
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question15
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Group
## trying - Race
## trying - Age_Cohort
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question14
## trying + Question16
## trying + Question17
## trying + Question18
## trying + Question22
## trying + Question23
## trying + Question24
## trying + SJA
## trying + Animal_Lover
## trying + Gender
## # weights: 84 (60 variable)
## initial value 124.766493
## iter 10 value 58.509776
## iter 20 value 34.602615
## iter 30 value 21.377537
## iter 40 value 14.179416
## iter 50 value 10.198975
## iter 60 value 1.358393
## iter 70 value 0.001723
## final value 0.000055
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question15
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Question29
## trying - Environmentalist
## trying - Feminist
## trying - Group
## trying - Race
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question14
## trying + Question16
## trying + Question17
## trying + Question18
## trying + Question22
## trying + Question23
## trying + Question24
## trying + SJA
## trying + Animal_Lover
## trying + Gender
## trying + Age_Cohort
## # weights: 80 (57 variable)
## initial value 124.766493
## iter 10 value 47.792443
## iter 20 value 34.392573
## iter 30 value 22.720912
## iter 40 value 15.345937
## iter 50 value 11.160363
## iter 60 value 9.751084
## iter 70 value 8.681686
## iter 80 value 0.435872
## iter 90 value 0.000449
## final value 0.000057
## converged
## trying - Interest_Veganism
## trying - Question4
## trying - Question6
## trying - Question7
## trying - Question8
## trying - Question11
## trying - Question15
## trying - Question19
## trying - Question20
## trying - Question21
## trying - Question25
## trying - Question26
## trying - Question27
## trying - Question28
## trying - Environmentalist
## trying - Feminist
## trying - Group
## trying - Race
## trying + Question5
## trying + Question9
## trying + Question10
## trying + Question12
## trying + Question13
## trying + Question14
## trying + Question16
## trying + Question17
## trying + Question18
## trying + Question22
## trying + Question23
## trying + Question24
## trying + Question29
## trying + SJA
## trying + Animal_Lover
## trying + Gender
## trying + Age_Cohort
## Call:
## multinom(formula = Diet ~ Interest_Veganism + Question4 + Question6 +
## Question7 + Question8 + Question11 + Question15 + Question19 +
## Question20 + Question21 + Question25 + Question26 + Question27 +
## Question28 + Environmentalist + Feminist + Group + Race,
## data = iBarriers.v)
##
## Coefficients:
## (Intercept) Interest_Veganism Question4 Question6 Question7
## Flexitarian 13060.2024 6289.771 1443.4186 -2852.8008 3237.1035
## Pescetarian 434.8581 2381.473 681.0775 -5771.9469 10085.1758
## Vegetarian 46979.4959 3462.682 -418.2598 -520.6968 186.5502
## Question8 Question11 Question15 Question19 Question20
## Flexitarian -1193.0348 -11106.79 2845.401 -1502.903 -1908.452
## Pescetarian -1682.6864 -23510.77 2187.259 4728.274 4988.434
## Vegetarian -895.8328 -18712.46 -1537.910 -4664.936 3991.689
## Question21 Question25 Question26 Question27 Question28
## Flexitarian 1529.052 -3508.986 913.8195 -586.3496 -2080.202
## Pescetarian 2256.248 -10978.140 -2556.0877 5574.8685 -1242.906
## Vegetarian 1988.132 -4558.468 -7656.9615 7597.3174 -2564.746
## Environmentalist Feminist Group Race
## Flexitarian 4232.887 882.14868 -166.9211 3666.805
## Pescetarian 15235.348 -6948.87762 225.7232 8506.220
## Vegetarian 3429.519 27.00841 1449.7197 -264.037
##
## Std. Errors:
## (Intercept) Interest_Veganism Question4 Question6
## Flexitarian 2.505995e+03 1.051699e+03 3.022355e+03 6.141512e+03
## Pescetarian 1.807246e-09 1.807246e-09 1.807246e-09 1.807246e-09
## Vegetarian 2.494757e-139 4.989515e-139 7.484272e-139 4.989515e-139
## Question7 Question8 Question11 Question15
## Flexitarian 7.947033e+03 4.744712e+03 3.033184e+03 1.462425e+03
## Pescetarian 1.807246e-09 5.421739e-09 5.421739e-09 9.036232e-09
## Vegetarian 7.484272e-139 9.979029e-139 4.989515e-139 4.989515e-139
## Question19 Question20 Question21 Question25
## Flexitarian 1.292479e+04 5.015072e+03 3.331534e+03 1.356068e+03
## Pescetarian 1.807246e-09 1.807246e-09 3.614493e-09 3.614493e-09
## Vegetarian 7.484272e-139 1.247379e-138 2.494757e-139 1.247379e-138
## Question26 Question27 Question28 Environmentalist
## Flexitarian 5.257355e+03 5.127986e+03 3.099804e+03 2.338078e+03
## Pescetarian 1.807246e-09 1.807246e-09 1.380529e-09 9.036232e-09
## Vegetarian 2.494757e-139 4.989515e-139 4.989515e-139 1.247379e-138
## Feminist Group Race
## Flexitarian 3.722167e+03 1.913628e+03 3.613079e+03
## Pescetarian 1.807246e-09 1.807246e-09 1.807246e-09
## Vegetarian 1.247379e-138 1.247379e-138 2.494757e-139
##
## Residual Deviance: 0.0001148672
## AIC: 114.0001
best.mlm.step <- multinom(formula = Diet ~ Question5 + Question7 + Question8 + Question9 +
Question11 + Question13 + Question16 + Question18 + Question21 + Question22 +
Question24 + Question25 + Question28 + Question29 + Environmentalist + Animal_Lover +
Group + Race + Gender + Age_Cohort, data = iBarriers.v)
## # weights: 88 (63 variable)
## initial value 124.766493
## iter 10 value 53.573126
## iter 20 value 32.864539
## iter 30 value 22.164009
## iter 40 value 14.860347
## iter 50 value 13.069672
## iter 60 value 12.092106
## iter 70 value 10.805683
## iter 80 value 9.979608
## iter 90 value 9.674509
## iter 100 value 9.635162
## final value 9.635162
## stopped after 100 iterations
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian
## Omnivore 42 1 0 0
## Flexitarian 2 23 0 0
## Pescetarian 0 0 12 0
## Vegetarian 0 0 0 10
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.9057, 0.9931)
## No Information Rate : 0.4889
## P-Value [Acc > NIR] : < 0.00000000000000022
##
## Kappa : 0.9497
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.9545 0.9583 1.0000
## Specificity 0.9783 0.9697 1.0000
## Pos Pred Value 0.9767 0.9200 1.0000
## Neg Pred Value 0.9574 0.9846 1.0000
## Prevalence 0.4889 0.2667 0.1333
## Detection Rate 0.4667 0.2556 0.1333
## Detection Prevalence 0.4778 0.2778 0.1333
## Balanced Accuracy 0.9664 0.9640 1.0000
## Class: Vegetarian
## Sensitivity 1.0000
## Specificity 1.0000
## Pos Pred Value 1.0000
## Neg Pred Value 1.0000
## Prevalence 0.1111
## Detection Rate 0.1111
## Detection Prevalence 0.1111
## Balanced Accuracy 1.0000
Note: This runs fine in R but I can’t get it to print to HTML properly, so I’ll have to do a workaround.
# library(caret) trControl <- caret::trainControl(method='repeatedcv', number=4,
# index=caret::createFolds(iBenefits, k=10), repeats = 4,p=.75, allowParallel =
# T,verboseIter = T) library(doParallel) # make a cluster with 6 cores cl <-
# makeCluster(detectCores()-2,type='PSOCK') # register the number of parallel
# workers registerDoParallel(cl) # return number of parallel workers
# getDoParWorkers() best.mlm.ben <- caret::train(Diet ~ . - Interest_Veganism,
# data=iBenefits, method='multinom',trControl=trControl) stopCluster(cl) #
# ----------------------- Tue Mar 06 2018-03-06 1150------------------------# #
# Test Accuracy best.mlm.ben.acc <-
# caret::confusionMatrix(predict(best.mlm.ben),iBenefits$Diet) # 100% accurate.
# Model is likely overfitted to training data, but this will lead to interesting
# insight about the factors that predict preference. # -----------------------
# Tue Mar 06 08:45:06 2018 ------------------------# # Display coefficients on
# both models best.mlm.ben.coef <- coef(best.mlm.ben$finalModel) %>% as.matrix()
# best.mlm.ben.coef <- as.data.frame(best.mlm.ben.coef,stringsAsFactors = F) %>%
# rownames_to_column() %>% gather(key='rowname',value=-rowname) %>%
# cbind(Feat=rep(rownames(coef(best.mlm.ben$finalModel)),36)) %>%
# rename('Coef'='-rowname') %>% mutate(pVal=as.vector(loadings2pval(Coef))) %>%
# arrange(pVal) %>% inner_join(qKey,by=c('rowname'='Short'))
# save(best.mlm.ben.acc,best.mlm.ben.coef,best.mlm.ben.step,file='MLRbenefits.Rdata')
load(file = "MLRbenefits.Rdata")
# ----------------------- Tue Mar 06 12:20:43 2018 ------------------------#
# Accuracy of Model
best.mlm.ben.acc
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian Vegan
## Omnivore 44 0 0 0 0
## Flexitarian 0 24 0 0 0
## Pescetarian 0 0 12 0 0
## Vegetarian 0 0 0 10 0
## Vegan 0 0 0 0 16
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9658, 1)
## No Information Rate : 0.4151
## P-Value [Acc > NIR] : < 0.00000000000000022
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.4151 0.2264 0.1132
## Detection Rate 0.4151 0.2264 0.1132
## Detection Prevalence 0.4151 0.2264 0.1132
## Balanced Accuracy 1.0000 1.0000 1.0000
## Class: Vegetarian Class: Vegan
## Sensitivity 1.00000 1.0000
## Specificity 1.00000 1.0000
## Pos Pred Value 1.00000 1.0000
## Neg Pred Value 1.00000 1.0000
## Prevalence 0.09434 0.1509
## Detection Rate 0.09434 0.1509
## Detection Prevalence 0.09434 0.1509
## Balanced Accuracy 1.00000 1.0000
# Coefficients and their statistical significance
DT::datatable(best.mlm.ben.coef) %>% formatStyle("pVal", backgroundColor = styleInterval(0.1,
c("lightgreen", "white")))
# ----------------------- Tue Mar 06 14:33:34 2018 ------------------------#
# Stepwise feature selection
mlm.ben <- nnet::multinom(Diet ~ ., data = iBenefits)
## # weights: 190 (148 variable)
## initial value 168.990981
## iter 10 value 52.848570
## iter 20 value 30.160739
## iter 30 value 15.988447
## iter 40 value 8.408688
## iter 50 value 0.039870
## final value 0.000062
## converged
# summary(step.mlm.ben <- step(mlm.ben,direction = 'both',trace=0)) #This errors
# so I will just load it.
best.mlm.ben.step <- nnet::multinom(formula = Diet ~ Interest_Veganism + Question30 +
Question31 + Question32 + Question33 + Question35 + Question36 + Question37 +
Question41 + Question42 + Question44 + Question45 + Question46 + Question47 +
Question51 + Question52 + Question53 + Question54 + Question55 + Question56 +
Environmentalist + Feminist + Animal_Lover + Group + Age_Cohort, data = iBenefits)
## # weights: 135 (104 variable)
## initial value 170.600419
## iter 10 value 67.165375
## iter 20 value 47.376452
## iter 30 value 32.603881
## iter 40 value 23.414883
## iter 50 value 21.444379
## iter 60 value 20.148754
## iter 70 value 19.144439
## iter 80 value 18.952579
## iter 90 value 18.940168
## iter 100 value 18.939265
## final value 18.939265
## stopped after 100 iterations
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian Vegan
## Omnivore 40 4 0 0 0
## Flexitarian 4 20 0 0 0
## Pescetarian 0 0 12 0 0
## Vegetarian 0 0 0 10 0
## Vegan 0 0 0 0 16
##
## Overall Statistics
##
## Accuracy : 0.9245
## 95% CI : (0.8567, 0.9669)
## No Information Rate : 0.4151
## P-Value [Acc > NIR] : < 0.00000000000000022
##
## Kappa : 0.8969
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.9091 0.8333 1.0000
## Specificity 0.9355 0.9512 1.0000
## Pos Pred Value 0.9091 0.8333 1.0000
## Neg Pred Value 0.9355 0.9512 1.0000
## Prevalence 0.4151 0.2264 0.1132
## Detection Rate 0.3774 0.1887 0.1132
## Detection Prevalence 0.4151 0.2264 0.1132
## Balanced Accuracy 0.9223 0.8923 1.0000
## Class: Vegetarian Class: Vegan
## Sensitivity 1.00000 1.0000
## Specificity 1.00000 1.0000
## Pos Pred Value 1.00000 1.0000
## Neg Pred Value 1.00000 1.0000
## Prevalence 0.09434 0.1509
## Detection Rate 0.09434 0.1509
## Detection Prevalence 0.09434 0.1509
## Balanced Accuracy 1.00000 1.0000
ben.step.vars <- c("Interest_Veganism", "Question30", "Question31", "Question32",
"Question33", "Question35", "Question36", "Question37", "Question41", "Question42",
"Question44", "Question45", "Question46", "Question47", "Question51", "Question52",
"Question53", "Question54", "Question55", "Question56", "Environmentalist", "Feminist",
"Animal_Lover", "Group", "Age_Cohort")
# ----------------------- Tue Mar 06 14:54:58 2018 ------------------------#
# These questions will have the best predictive value for Diet according to
# whether they are a benefit or a barrier
(BarQ <- union(bar.pval.vars, bar.step.vars))
## [1] "Question11" "Environmentalist" "Age_Cohort"
## [4] "Question13" "Question22" "Question7"
## [7] "Question17" "Question25" "Question12"
## [10] "Question24" "SJA" "Question5"
## [13] "Question8" "Question9" "Question16"
## [16] "Question18" "Question21" "Question28"
## [19] "Question29" "Animal_Lover" "Group"
## [22] "Race" "Gender"
## [1] "Question30" "Question38" "Question55"
## [4] "Question52" "Question31" "Question45"
## [7] "Question50" "Race" "Question32"
## [10] "Question43" "Interest_Veganism" "Question33"
## [13] "Question35" "Question36" "Question37"
## [16] "Question41" "Question42" "Question44"
## [19] "Question46" "Question47" "Question51"
## [22] "Question53" "Question54" "Question56"
## [25] "Environmentalist" "Feminist" "Animal_Lover"
## [28] "Group" "Age_Cohort"
# ----------------------- Wed Mar 07 18:03:15 2018 ------------------------#
# Barriers
iBarC <- iBarriers %>% select(one_of(BarC.key$Short))
iBarH <- iBarriers %>% select(one_of(BarH.key$Short))
iBarK <- iBarriers %>% select(one_of(BarK.key$Short))
iBarN <- iBarriers %>% select(one_of(BarN.key$Short))
iBarP <- iBarriers %>% select(one_of(BarP.key$Short))
Bar.scales <- list(Convenience = iBarC, Health = iBarH, Knowledge = iBarK, Normative = iBarN,
Preference = iBarP)
Bar.groups <- list(1:4, 5:8, 9:11, 12:15, 16:20)
names(Bar.groups) <- names(Bar.scales)
Bar.all <- do.call(cbind, Bar.scales)
psych::scoreItems(keys = psych::make.keys(nvars = 20, keys.list = Bar.groups, item.labels = colnames(Bar.all)),
items = Bar.all)
## Call: psych::scoreItems(keys = psych::make.keys(nvars = 20, keys.list = Bar.groups,
## item.labels = colnames(Bar.all)), items = Bar.all)
##
## (Unstandardized) Alpha:
## Convenience Health Knowledge Normative Preference
## alpha 0.71 0.84 0.76 0.63 0.72
##
## Standard errors of unstandardized Alpha:
## Convenience Health Knowledge Normative Preference
## ASE 0.089 0.07 0.1 0.097 0.076
##
## Average item correlation:
## Convenience Health Knowledge Normative Preference
## average.r 0.38 0.56 0.51 0.3 0.35
##
## Guttman 6* reliability:
## Convenience Health Knowledge Normative Preference
## Lambda.6 0.74 0.86 0.75 0.73 0.81
##
## Signal/Noise based upon av.r :
## Convenience Health Knowledge Normative Preference
## Signal/Noise 2.4 5.1 3.1 1.7 2.6
##
## Scale intercorrelations corrected for attenuation
## raw correlations below the diagonal, alpha on the diagonal
## corrected correlations above the diagonal:
## Convenience Health Knowledge Normative Preference
## Convenience 0.71 0.29 0.61 0.27 0.39
## Health 0.22 0.84 0.36 0.31 0.56
## Knowledge 0.44 0.29 0.76 0.20 0.23
## Normative 0.18 0.23 0.14 0.63 0.49
## Preference 0.28 0.43 0.17 0.33 0.72
##
## In order to see the item by scale loadings and frequency counts of the data
## print with the short option = FALSE
# ----------------------- Wed Mar 07 18:08:58 2018 ------------------------#
# Benefits
iBenH <- iBenefits %>% select(one_of(BenH.key$Short))
iBenME <- iBenefits %>% select(one_of(BenME.key$Short))
Ben.scales <- list(Health = iBenH, MoralEnv = iBenME)
lapply(Ben.scales, length)
## $Health
## [1] 11
##
## $MoralEnv
## [1] 9
Ben.groups <- list(Health = 1:11, MoralEnv = 12:20)
Ben.all <- do.call(cbind, Ben.scales)
psych::scoreItems(keys = psych::make.keys(nvars = 20, keys.list = Ben.groups, item.labels = colnames(Ben.all)),
items = Ben.all)
## Call: psych::scoreItems(keys = psych::make.keys(nvars = 20, keys.list = Ben.groups,
## item.labels = colnames(Ben.all)), items = Ben.all)
##
## (Unstandardized) Alpha:
## Health MoralEnv
## alpha 0.95 0.95
##
## Standard errors of unstandardized Alpha:
## Health MoralEnv
## ASE 0.021 0.024
##
## Average item correlation:
## Health MoralEnv
## average.r 0.61 0.67
##
## Guttman 6* reliability:
## Health MoralEnv
## Lambda.6 0.96 0.97
##
## Signal/Noise based upon av.r :
## Health MoralEnv
## Signal/Noise 17 18
##
## Scale intercorrelations corrected for attenuation
## raw correlations below the diagonal, alpha on the diagonal
## corrected correlations above the diagonal:
## Health MoralEnv
## Health 0.95 0.60
## MoralEnv 0.56 0.95
##
## In order to see the item by scale loadings and frequency counts of the data
## print with the short option = FALSE
# ----------------------- Wed Mar 07 20:51:29 2018 ------------------------#
# Compare the scale questions with those with predictive power from the MLR
all(c(lapply(Ben.scales, names) %>% unlist, lapply(Bar.scales, names) %>% unlist) %in%
OptQ[["All"]]$Short)
## [1] FALSE
# View the questions that are not in the feature selection set
c(lapply(Ben.scales, names) %>% unlist, lapply(Bar.scales, names) %>% unlist)[!c(lapply(Ben.scales,
names) %>% unlist, lapply(Bar.scales, names) %>% unlist) %in% OptQ[["All"]]$Short]
## Health5 Health10 MoralEnv5 Convenience2 Convenience4
## "Question34" "Question40" "Question49" "Question19" "Question20"
## Health1 Health2 Knowledge3 Normative3 Normative4
## "Question26" "Question27" "Question23" "Question4" "Question6"
## Preference2 Preference4
## "Question15" "Question14"
## Diet~+Question17+Question19+Question18+Question20+Question26+Question27+Question28+Question29+Question24+Question25+Question23+Question5+Question9+Question4+Question6+Question11+Question15+Question13+Question14+Question12
Evan.bar.mlr <- nnet::multinom(Diet ~ Interest_Veganism + Question17 + Question19 +
Question18 + Question20 + Question26 + Question27 + Question28 + Question29 +
Question24 + Question25 + Question23 + Question5 + Question9 + Question4 + Question6 +
Question11 + Question15 + Question13 + Question14 + Question12, data = iBarriers)
## # weights: 92 (66 variable)
## initial value 124.766493
## iter 10 value 63.311608
## iter 20 value 40.779788
## iter 30 value 26.626715
## iter 40 value 19.804762
## iter 50 value 18.446852
## iter 60 value 17.959982
## iter 70 value 17.679003
## iter 80 value 17.621367
## iter 90 value 17.613792
## iter 100 value 17.613381
## final value 17.613381
## stopped after 100 iterations
Evan.bar.mlr.pred <- predict(Evan.bar.mlr, iBarriers)
caret::confusionMatrix(Evan.bar.mlr.pred, iBarriers$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian Vegan
## Omnivore 41 4 0 0 0
## Flexitarian 3 20 0 0 0
## Pescetarian 0 0 12 0 0
## Vegetarian 0 0 0 10 0
## Vegan 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.9222
## 95% CI : (0.8463, 0.9682)
## No Information Rate : 0.4889
## P-Value [Acc > NIR] : < 0.00000000000000022
##
## Kappa : 0.8817
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.9318 0.8333 1.0000
## Specificity 0.9130 0.9545 1.0000
## Pos Pred Value 0.9111 0.8696 1.0000
## Neg Pred Value 0.9333 0.9403 1.0000
## Prevalence 0.4889 0.2667 0.1333
## Detection Rate 0.4556 0.2222 0.1333
## Detection Prevalence 0.5000 0.2556 0.1333
## Balanced Accuracy 0.9224 0.8939 1.0000
## Class: Vegetarian Class: Vegan
## Sensitivity 1.0000 NA
## Specificity 1.0000 1
## Pos Pred Value 1.0000 NA
## Neg Pred Value 1.0000 NA
## Prevalence 0.1111 0
## Detection Rate 0.1111 0
## Detection Prevalence 0.1111 0
## Balanced Accuracy 1.0000 NA
Evan.bar.means <- iBarriers.v %>% select(one_of(c("Diet", "Interest_Veganism", (lapply(Bar.scales,
names) %>% unlist)))) %>% group_by(Diet) %>% summarise_all(funs(mean))
Evan.bar.means.mlr <- nnet::multinom(Diet ~ ., data = Evan.bar.means)
## # weights: 92 (66 variable)
## initial value 5.545177
## iter 10 value 0.068637
## final value 0.000070
## converged
Evan.bar.means.mlr.pred <- predict(Evan.bar.means.mlr, iBarriers.v)
caret::confusionMatrix(Evan.bar.means.mlr.pred, iBarriers.v$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian
## Omnivore 19 4 0 0
## Flexitarian 16 14 3 4
## Pescetarian 4 5 6 1
## Vegetarian 5 1 3 5
##
## Overall Statistics
##
## Accuracy : 0.4889
## 95% CI : (0.382, 0.5965)
## No Information Rate : 0.4889
## P-Value [Acc > NIR] : 0.541638
##
## Kappa : 0.2945
## Mcnemar's Test P-Value : 0.003397
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.4318 0.5833 0.50000
## Specificity 0.9130 0.6515 0.87179
## Pos Pred Value 0.8261 0.3784 0.37500
## Neg Pred Value 0.6269 0.8113 0.91892
## Prevalence 0.4889 0.2667 0.13333
## Detection Rate 0.2111 0.1556 0.06667
## Detection Prevalence 0.2556 0.4111 0.17778
## Balanced Accuracy 0.6724 0.6174 0.68590
## Class: Vegetarian
## Sensitivity 0.50000
## Specificity 0.88750
## Pos Pred Value 0.35714
## Neg Pred Value 0.93421
## Prevalence 0.11111
## Detection Rate 0.05556
## Detection Prevalence 0.15556
## Balanced Accuracy 0.69375
Evan.bar.means <- iBarriers.v %>% select(one_of(c("Diet", "Interest_Veganism", names(iBarriers.v)[29:36],
(lapply(Bar.scales, names) %>% unlist)))) %>% group_by(Diet) %>% summarise_all(funs(mean))
Evan.bar.means.mlr <- nnet::multinom(Diet ~ ., data = Evan.bar.means)
## # weights: 124 (90 variable)
## initial value 5.545177
## iter 10 value 0.018203
## iter 20 value 0.000103
## iter 20 value 0.000061
## iter 20 value 0.000059
## final value 0.000059
## converged
Evan.bar.means.mlr.pred <- predict(Evan.bar.means.mlr, iBarriers.v)
caret::confusionMatrix(Evan.bar.means.mlr.pred, iBarriers.v$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian
## Omnivore 26 3 1 1
## Flexitarian 15 18 2 4
## Pescetarian 1 1 8 0
## Vegetarian 2 2 1 5
##
## Overall Statistics
##
## Accuracy : 0.6333
## 95% CI : (0.5251, 0.7325)
## No Information Rate : 0.4889
## P-Value [Acc > NIR] : 0.004073
##
## Kappa : 0.4677
## Mcnemar's Test P-Value : 0.111298
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.5909 0.7500 0.66667
## Specificity 0.8913 0.6818 0.97436
## Pos Pred Value 0.8387 0.4615 0.80000
## Neg Pred Value 0.6949 0.8824 0.95000
## Prevalence 0.4889 0.2667 0.13333
## Detection Rate 0.2889 0.2000 0.08889
## Detection Prevalence 0.3444 0.4333 0.11111
## Balanced Accuracy 0.7411 0.7159 0.82051
## Class: Vegetarian
## Sensitivity 0.50000
## Specificity 0.93750
## Pos Pred Value 0.50000
## Neg Pred Value 0.93750
## Prevalence 0.11111
## Detection Rate 0.05556
## Detection Prevalence 0.11111
## Balanced Accuracy 0.71875
Evan.bar.items <- iBarriers.v %>% select(one_of(c("Diet", "Interest_Veganism", names(iBarriers.v)[29:36],
(lapply(Bar.scales, names) %>% unlist))))
Evan.bar.items.mlr <- nnet::multinom(Diet ~ ., data = Evan.bar.items)
## # weights: 124 (90 variable)
## initial value 124.766493
## iter 10 value 51.953585
## iter 20 value 22.062162
## iter 30 value 6.221790
## iter 40 value 0.010267
## final value 0.000052
## converged
Evan.bar.items.mlr.pred <- predict(Evan.bar.items.mlr, iBarriers.v)
caret::confusionMatrix(Evan.bar.items.mlr.pred, iBarriers.v$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian
## Omnivore 44 0 0 0
## Flexitarian 0 24 0 0
## Pescetarian 0 0 12 0
## Vegetarian 0 0 0 10
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.9598, 1)
## No Information Rate : 0.4889
## P-Value [Acc > NIR] : < 0.00000000000000022
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.4889 0.2667 0.1333
## Detection Rate 0.4889 0.2667 0.1333
## Detection Prevalence 0.4889 0.2667 0.1333
## Balanced Accuracy 1.0000 1.0000 1.0000
## Class: Vegetarian
## Sensitivity 1.0000
## Specificity 1.0000
## Pos Pred Value 1.0000
## Neg Pred Value 1.0000
## Prevalence 0.1111
## Detection Rate 0.1111
## Detection Prevalence 0.1111
## Balanced Accuracy 1.0000
Can variables be eliminated and retain the accuracy?
# Evan.bar.items.step <- step(Evan.bar.items.mlr,direction='both',trace=F)
# Evan.bar.items.step %>% summary
Evan.bar.items.step.mlr <- nnet::multinom(formula = Diet ~ Environmentalist + Feminist +
SJA + Group + Age_Cohort + Question17 + Question20 + Question28 + Question25 +
Question9 + Question4 + Question11 + Question13 + Question14, data = Evan.bar.items)
## # weights: 64 (45 variable)
## initial value 124.766493
## iter 10 value 56.269174
## iter 20 value 39.080114
## iter 30 value 32.373620
## iter 40 value 22.980824
## iter 50 value 19.151602
## iter 60 value 12.809248
## iter 70 value 12.326466
## iter 80 value 12.089918
## iter 90 value 12.066900
## iter 100 value 12.066540
## final value 12.066540
## stopped after 100 iterations
Evan.bar.items.step.mlr.pred <- predict(Evan.bar.items.step.mlr, Evan.bar.items)
caret::confusionMatrix(Evan.bar.items.step.mlr.pred, Evan.bar.items$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian
## Omnivore 40 5 0 0
## Flexitarian 4 19 0 0
## Pescetarian 0 0 12 0
## Vegetarian 0 0 0 10
##
## Overall Statistics
##
## Accuracy : 0.9
## 95% CI : (0.8186, 0.9532)
## No Information Rate : 0.4889
## P-Value [Acc > NIR] : < 0.00000000000000022
##
## Kappa : 0.8479
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.9091 0.7917 1.0000
## Specificity 0.8913 0.9394 1.0000
## Pos Pred Value 0.8889 0.8261 1.0000
## Neg Pred Value 0.9111 0.9254 1.0000
## Prevalence 0.4889 0.2667 0.1333
## Detection Rate 0.4444 0.2111 0.1333
## Detection Prevalence 0.5000 0.2556 0.1333
## Balanced Accuracy 0.9002 0.8655 1.0000
## Class: Vegetarian
## Sensitivity 1.0000
## Specificity 1.0000
## Pos Pred Value 1.0000
## Neg Pred Value 1.0000
## Prevalence 0.1111
## Detection Rate 0.1111
## Detection Prevalence 0.1111
## Balanced Accuracy 1.0000
Wow, 100% Accuracy with only 9 question and some Demographic variables
## Diet~+Question30+Question31+Question32+Question33+Question34+Question35+Question36+Question37+Question38+Question40+Question41+Question43+Question44+Question45+Question46+Question49+Question50+Question51+Question52+Question53
Evan.ben.mlr <- nnet::multinom(Diet ~ Question30 + Question31 + Question32 + Question33 +
Question34 + Question35 + Question36 + Question37 + Question38 + Question40 +
Question41 + Question43 + Question44 + Question45 + Question46 + Question49 +
Question50 + Question51 + Question52 + Question53, data = iBenefits)
## # weights: 110 (84 variable)
## initial value 170.600419
## iter 10 value 106.011315
## iter 20 value 82.260018
## iter 30 value 72.283157
## iter 40 value 68.723215
## iter 50 value 66.125303
## iter 60 value 61.641560
## iter 70 value 54.983769
## iter 80 value 52.116577
## iter 90 value 50.270400
## iter 100 value 49.801088
## final value 49.801088
## stopped after 100 iterations
Evan.ben.mlr.pred <- predict(Evan.ben.mlr, iBenefits)
caret::confusionMatrix(Evan.ben.mlr.pred, iBenefits$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian Vegan
## Omnivore 38 6 1 0 0
## Flexitarian 3 15 4 0 1
## Pescetarian 1 3 6 0 0
## Vegetarian 0 0 0 10 0
## Vegan 2 0 1 0 15
##
## Overall Statistics
##
## Accuracy : 0.7925
## 95% CI : (0.7028, 0.8651)
## No Information Rate : 0.4151
## P-Value [Acc > NIR] : 0.000000000000002373
##
## Kappa : 0.7155
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.8636 0.6250 0.50000
## Specificity 0.8871 0.9024 0.95745
## Pos Pred Value 0.8444 0.6522 0.60000
## Neg Pred Value 0.9016 0.8916 0.93750
## Prevalence 0.4151 0.2264 0.11321
## Detection Rate 0.3585 0.1415 0.05660
## Detection Prevalence 0.4245 0.2170 0.09434
## Balanced Accuracy 0.8754 0.7637 0.72872
## Class: Vegetarian Class: Vegan
## Sensitivity 1.00000 0.9375
## Specificity 1.00000 0.9667
## Pos Pred Value 1.00000 0.8333
## Neg Pred Value 1.00000 0.9886
## Prevalence 0.09434 0.1509
## Detection Rate 0.09434 0.1415
## Detection Prevalence 0.09434 0.1698
## Balanced Accuracy 1.00000 0.9521
Evan.ben.means <- vector("list", length(Ben.scales))
for (i in seq_along(Ben.scales)) {
Evan.ben.means[[i]] <- iBenefits %>% select(one_of(c("Diet", "Interest_Veganism",
names(Ben.scales[[i]])))) %>% group_by(Diet) %>% summarise_all(funs(mean))
}
Evan.ben.means <- do.call(left_join, args = list(x = Evan.ben.means[[1]], y = Evan.ben.means[[2]],
by = c("Diet", "Interest_Veganism")))
Evan.ben.means.mlr <- nnet::multinom(Diet ~ ., data = Evan.ben.means)
## # weights: 115 (88 variable)
## initial value 8.047190
## iter 10 value 0.040793
## iter 20 value 0.000274
## final value 0.000073
## converged
Evan.ben.means.mlr.pred <- predict(Evan.ben.means.mlr, iBenefits)
caret::confusionMatrix(Evan.ben.means.mlr.pred, iBenefits$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian Vegan
## Omnivore 13 2 1 1 0
## Flexitarian 15 16 3 4 0
## Pescetarian 8 4 7 0 0
## Vegetarian 7 1 1 3 3
## Vegan 1 1 0 2 13
##
## Overall Statistics
##
## Accuracy : 0.4906
## 95% CI : (0.3922, 0.5895)
## No Information Rate : 0.4151
## P-Value [Acc > NIR] : 0.07034
##
## Kappa : 0.3587
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.2955 0.6667 0.58333
## Specificity 0.9355 0.7317 0.87234
## Pos Pred Value 0.7647 0.4211 0.36842
## Neg Pred Value 0.6517 0.8824 0.94253
## Prevalence 0.4151 0.2264 0.11321
## Detection Rate 0.1226 0.1509 0.06604
## Detection Prevalence 0.1604 0.3585 0.17925
## Balanced Accuracy 0.6155 0.6992 0.72784
## Class: Vegetarian Class: Vegan
## Sensitivity 0.30000 0.8125
## Specificity 0.87500 0.9556
## Pos Pred Value 0.20000 0.7647
## Neg Pred Value 0.92308 0.9663
## Prevalence 0.09434 0.1509
## Detection Rate 0.02830 0.1226
## Detection Prevalence 0.14151 0.1604
## Balanced Accuracy 0.58750 0.8840
Evan.ben.items <- iBenefits %>% select(one_of(c("Diet", "Interest_Veganism", names(iBarriers)[29:36],
(lapply(Ben.scales, names) %>% unlist))))
Evan.ben.items.mlr <- nnet::multinom(Diet ~ ., data = Evan.ben.items)
## # weights: 155 (120 variable)
## initial value 168.990981
## iter 10 value 66.945646
## iter 20 value 44.317171
## iter 30 value 29.950262
## iter 40 value 21.708441
## iter 50 value 19.349112
## iter 60 value 18.350967
## iter 70 value 18.109316
## iter 80 value 17.873811
## iter 90 value 17.709807
## iter 100 value 17.680862
## final value 17.680862
## stopped after 100 iterations
Evan.ben.items.mlr.pred <- predict(Evan.ben.items.mlr, iBenefits)
caret::confusionMatrix(Evan.ben.items.mlr.pred, iBenefits$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian Vegan
## Omnivore 40 3 0 0 0
## Flexitarian 4 21 0 0 0
## Pescetarian 0 0 12 0 0
## Vegetarian 0 0 0 10 0
## Vegan 0 0 0 0 15
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8675, 0.9728)
## No Information Rate : 0.419
## P-Value [Acc > NIR] : < 0.00000000000000022
##
## Kappa : 0.9089
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.9091 0.8750 1.0000
## Specificity 0.9508 0.9506 1.0000
## Pos Pred Value 0.9302 0.8400 1.0000
## Neg Pred Value 0.9355 0.9625 1.0000
## Prevalence 0.4190 0.2286 0.1143
## Detection Rate 0.3810 0.2000 0.1143
## Detection Prevalence 0.4095 0.2381 0.1143
## Balanced Accuracy 0.9300 0.9128 1.0000
## Class: Vegetarian Class: Vegan
## Sensitivity 1.00000 1.0000
## Specificity 1.00000 1.0000
## Pos Pred Value 1.00000 1.0000
## Neg Pred Value 1.00000 1.0000
## Prevalence 0.09524 0.1429
## Detection Rate 0.09524 0.1429
## Detection Prevalence 0.09524 0.1429
## Balanced Accuracy 1.00000 1.0000
# Evan.ben.items.step <- step(Evan.ben.items.mlr,direction='both',trace=F)
# Evan.ben.items.step %>% summary
Evan.ben.items.step.mlr <- nnet::multinom(formula = Diet ~ Interest_Veganism + Feminist +
SJA + Animal_Lover + Gender + Age_Cohort + Question30 + Question31 + Question32 +
Question35 + Question37 + Question38 + Question45 + Question50 + Question52 +
Question53, data = Evan.ben.items)
## # weights: 90 (68 variable)
## initial value 168.990981
## iter 10 value 84.754305
## iter 20 value 63.044781
## iter 30 value 49.854825
## iter 40 value 39.872971
## iter 50 value 33.059102
## iter 60 value 28.881446
## iter 70 value 26.457202
## iter 80 value 23.259939
## iter 90 value 23.238566
## iter 100 value 23.238436
## final value 23.238436
## stopped after 100 iterations
Evan.ben.items.step.mlr.pred <- predict(Evan.ben.items.step.mlr, Evan.ben.items)
caret::confusionMatrix(Evan.ben.items.step.mlr.pred, Evan.ben.items$Diet)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Omnivore Flexitarian Pescetarian Vegetarian Vegan
## Omnivore 40 5 0 0 0
## Flexitarian 4 19 0 0 0
## Pescetarian 0 0 12 0 0
## Vegetarian 0 0 0 10 0
## Vegan 0 0 0 0 15
##
## Overall Statistics
##
## Accuracy : 0.9143
## 95% CI : (0.8435, 0.9601)
## No Information Rate : 0.419
## P-Value [Acc > NIR] : < 0.00000000000000022
##
## Kappa : 0.8822
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Omnivore Class: Flexitarian Class: Pescetarian
## Sensitivity 0.9091 0.7917 1.0000
## Specificity 0.9180 0.9506 1.0000
## Pos Pred Value 0.8889 0.8261 1.0000
## Neg Pred Value 0.9333 0.9390 1.0000
## Prevalence 0.4190 0.2286 0.1143
## Detection Rate 0.3810 0.1810 0.1143
## Detection Prevalence 0.4286 0.2190 0.1143
## Balanced Accuracy 0.9136 0.8711 1.0000
## Class: Vegetarian Class: Vegan
## Sensitivity 1.00000 1.0000
## Specificity 1.00000 1.0000
## Pos Pred Value 1.00000 1.0000
## Neg Pred Value 1.00000 1.0000
## Prevalence 0.09524 0.1429
## Detection Rate 0.09524 0.1429
## Detection Prevalence 0.09524 0.1429
## Balanced Accuracy 1.00000 1.0000
90% Accuracy with 10 Questions and DemoVars. Not bad!