El paquete CARET (Classification And REgression Training) es un paquete integral con una amplia variedad de algoritmos para el aprendizaje automático.
{r message=FALSE, warning=FALSE}
library(ggplot2)
library(lattice)
library(caret)
library(DataExplorer)
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:ggplot2':
##
## alpha
ruta <- "/Users/samanthagarcia/Desktop/M1_data.csv"
df <- read.csv(ruta)
head(df)
## trust_apple interest_computers age_computer user_pcmac appleproducts_count
## 1 No 4 8 PC 0
## 2 Yes 2 4 PC 1
## 3 Yes 5 6 PC 0
## 4 Yes 2 6 Apple 4
## 5 Yes 4 4 Apple 7
## 6 Yes 3 1 Apple 2
## familiarity_m1 f_batterylife f_price f_size f_multitasking f_noise
## 1 No 5 4 3 4 4
## 2 No 5 5 5 3 4
## 3 No 3 4 2 4 1
## 4 No 4 3 3 4 4
## 5 Yes 5 3 3 4 4
## 6 No 5 5 4 4 5
## f_performance f_neural f_synergy f_performanceloss m1_consideration
## 1 2 2 1 1 1
## 2 5 2 2 4 2
## 3 4 2 2 2 4
## 4 4 4 4 3 2
## 5 5 3 4 4 4
## 6 5 5 4 2 2
## m1_purchase gender age_group income_group status domain
## 1 Yes Male 2 2 Student Science
## 2 No Male 2 3 Employed Finance
## 3 Yes Male 2 2 Student IT & Technology
## 4 No Female 2 2 Student Arts & Culture
## 5 Yes Male 5 7 Employed Hospitality
## 6 No Female 2 2 Student Politics
target <- "m1_purchase"
target <- names(df)[ncol(df)]
df[[target]] <- as.factor(df[[target]])
levels(df[[target]])
## [1] "Administration & Public Services" "Agriculture"
## [3] "Arts & Culture" "Business"
## [5] "Communication " "Consulting "
## [7] "Economics" "Education"
## [9] "Engineering" "Finance"
## [11] "Healthcare" "Hospitality"
## [13] "IT & Technology" "Law"
## [15] "Logistics" "Marketing"
## [17] "Politics" "Realestate"
## [19] "Retail" "Retired"
## [21] "Science" "Social Sciences"
tabla <- table(df[[target]])
clases_pequenas <- names(tabla[tabla < 5])
df[[target]] <- as.character(df[[target]])
df[[target]][df[[target]] %in% clases_pequenas] <- "Other"
df[[target]] <- as.factor(df[[target]])
table(df[[target]])
##
## Arts & Culture Business Education Engineering Finance
## 6 14 5 7 7
## Hospitality IT & Technology Marketing Other Science
## 6 33 21 21 7
## Social Sciences
## 6
formula_modelo <- as.formula(paste(target, "~ ."))
summary(df)
## trust_apple interest_computers age_computer user_pcmac
## Length:133 Min. :2.000 Min. :0.000 Length:133
## Class :character 1st Qu.:3.000 1st Qu.:1.000 Class :character
## Mode :character Median :4.000 Median :3.000 Mode :character
## Mean :3.812 Mean :2.827
## 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :5.000 Max. :9.000
##
## appleproducts_count familiarity_m1 f_batterylife f_price
## Min. :0.000 Length:133 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 Class :character 1st Qu.:4.000 1st Qu.:3.000
## Median :3.000 Mode :character Median :5.000 Median :4.000
## Mean :2.609 Mean :4.526 Mean :3.872
## 3rd Qu.:4.000 3rd Qu.:5.000 3rd Qu.:5.000
## Max. :8.000 Max. :5.000 Max. :5.000
##
## f_size f_multitasking f_noise f_performance f_neural
## Min. :1.000 Min. :2.00 Min. :1.000 Min. :2.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:4.00 1st Qu.:3.000 1st Qu.:4.000 1st Qu.:2.000
## Median :3.000 Median :4.00 Median :4.000 Median :5.000 Median :3.000
## Mean :3.158 Mean :4.12 Mean :3.729 Mean :4.398 Mean :3.165
## 3rd Qu.:4.000 3rd Qu.:5.00 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.:4.000
## Max. :5.000 Max. :5.00 Max. :5.000 Max. :5.000 Max. :5.000
##
## f_synergy f_performanceloss m1_consideration m1_purchase
## Min. :1.000 Min. :1.000 Min. :1.000 Length:133
## 1st Qu.:3.000 1st Qu.:3.000 1st Qu.:3.000 Class :character
## Median :4.000 Median :4.000 Median :4.000 Mode :character
## Mean :3.466 Mean :3.376 Mean :3.609
## 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:5.000
## Max. :5.000 Max. :5.000 Max. :5.000
##
## gender age_group income_group status
## Length:133 Min. : 1.00 Min. :1.00 Length:133
## Class :character 1st Qu.: 2.00 1st Qu.:1.00 Class :character
## Mode :character Median : 2.00 Median :2.00 Mode :character
## Mean : 2.97 Mean :2.97
## 3rd Qu.: 3.00 3rd Qu.:4.00
## Max. :10.00 Max. :7.00
##
## domain
## IT & Technology:33
## Marketing :21
## Other :21
## Business :14
## Engineering : 7
## Finance : 7
## (Other) :30
str(df)
## 'data.frame': 133 obs. of 22 variables:
## $ trust_apple : chr "No" "Yes" "Yes" "Yes" ...
## $ interest_computers : int 4 2 5 2 4 3 3 3 4 5 ...
## $ age_computer : int 8 4 6 6 4 1 2 0 2 0 ...
## $ user_pcmac : chr "PC" "PC" "PC" "Apple" ...
## $ appleproducts_count: int 0 1 0 4 7 2 7 0 6 7 ...
## $ familiarity_m1 : chr "No" "No" "No" "No" ...
## $ f_batterylife : int 5 5 3 4 5 5 4 5 4 5 ...
## $ f_price : int 4 5 4 3 3 5 3 5 4 3 ...
## $ f_size : int 3 5 2 3 3 4 4 4 3 5 ...
## $ f_multitasking : int 4 3 4 4 4 4 5 4 4 5 ...
## $ f_noise : int 4 4 1 4 4 5 5 3 4 5 ...
## $ f_performance : int 2 5 4 4 5 5 5 3 4 5 ...
## $ f_neural : int 2 2 2 4 3 5 3 2 3 3 ...
## $ f_synergy : int 1 2 2 4 4 4 3 2 3 5 ...
## $ f_performanceloss : int 1 4 2 3 4 2 2 3 4 5 ...
## $ m1_consideration : int 1 2 4 2 4 2 3 1 5 5 ...
## $ m1_purchase : chr "Yes" "No" "Yes" "No" ...
## $ gender : chr "Male" "Male" "Male" "Female" ...
## $ age_group : int 2 2 2 2 5 2 6 2 8 4 ...
## $ income_group : int 2 3 2 2 7 2 7 2 7 6 ...
## $ status : chr "Student" "Employed" "Student" "Student" ...
## $ domain : Factor w/ 11 levels "Arts & Culture",..: 10 5 7 1 6 9 7 11 7 6 ...
plot_missing(df)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the DataExplorer package.
## Please report the issue at
## <https://github.com/boxuancui/DataExplorer/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot_histogram(df)
plot_correlation(df)
df_num <- df[, sapply(df, is.numeric)]
plot_correlation(df_num)
for (col in names(df)) {
if (is.character(df[[col]])) df[[col]] <- factor(df[[col]])
}
niveles_df <- lapply(df, function(x) if (is.factor(x)) levels(x) else NULL)
set.seed(123)
renglones_entrenamiento <- createDataPartition(df[[target]], p = 0.8, list = FALSE)
entrenamiento <- df[renglones_entrenamiento, ]
prueba <- df[-renglones_entrenamiento, ]
for (col in names(df)) {
if (is.factor(df[[col]])) {
entrenamiento[[col]] <- factor(entrenamiento[[col]], levels = niveles_df[[col]])
prueba[[col]] <- factor(prueba[[col]], levels = niveles_df[[col]])
}
}
nzv <- nearZeroVar(entrenamiento)
if(length(nzv) > 0){
entrenamiento <- entrenamiento[, -nzv]
prueba <- prueba[, -nzv]
}
ctrl <- trainControl(method = "cv", number = 10)
target
## [1] "domain"
str(entrenamiento[[target]])
## Factor w/ 11 levels "Arts & Culture",..: 10 5 7 6 7 11 6 11 7 7 ...
levels(entrenamiento[[target]])
## [1] "Arts & Culture" "Business" "Education" "Engineering"
## [5] "Finance" "Hospitality" "IT & Technology" "Marketing"
## [9] "Other" "Science" "Social Sciences"
modelo1 <- train(
formula_modelo, data = entrenamiento,
method = "svmLinear",
preProcess = c("scale", "center"),
trControl = ctrl,
tuneGrid = data.frame(C = 1)
)
pred_train1 <- predict(modelo1, entrenamiento)
pred_test1 <- predict(modelo1, prueba)
mcre1 <- confusionMatrix(pred_train1, entrenamiento[[target]])
mcrp1 <- confusionMatrix(pred_test1, prueba[[target]])
mcre1
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 5 0 0 0 0
## Business 0 10 0 0 0
## Education 0 0 4 0 0
## Engineering 0 0 0 6 0
## Finance 0 0 0 0 6
## Hospitality 0 0 0 0 0
## IT & Technology 0 1 0 0 0
## Marketing 0 1 0 0 0
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 5 0 0 0 0
## IT & Technology 0 26 1 2 0
## Marketing 0 0 15 2 1
## Other 0 1 1 13 0
## Science 0 0 0 0 5
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 0
## Marketing 0
## Other 0
## Science 0
## Social Sciences 5
##
## Overall Statistics
##
## Accuracy : 0.9091
## 95% CI : (0.8392, 0.9555)
## No Information Rate : 0.2455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.8942
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 1.00000 0.83333 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 0.98000 1.00000
## Prevalence 0.04545 0.10909 0.03636
## Detection Rate 0.04545 0.09091 0.03636
## Detection Prevalence 0.04545 0.09091 0.03636
## Balanced Accuracy 1.00000 0.91667 1.00000
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 1.00000 1.00000 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000
## Prevalence 0.05455 0.05455 0.04545
## Detection Rate 0.05455 0.05455 0.04545
## Detection Prevalence 0.05455 0.05455 0.04545
## Balanced Accuracy 1.00000 1.00000 1.00000
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 0.9630 0.8824 0.7647
## Specificity 0.9518 0.9570 0.9785
## Pos Pred Value 0.8667 0.7895 0.8667
## Neg Pred Value 0.9875 0.9780 0.9579
## Prevalence 0.2455 0.1545 0.1545
## Detection Rate 0.2364 0.1364 0.1182
## Detection Prevalence 0.2727 0.1727 0.1364
## Balanced Accuracy 0.9574 0.9197 0.8716
## Class: Science Class: Social Sciences
## Sensitivity 0.83333 1.00000
## Specificity 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000
## Neg Pred Value 0.99048 1.00000
## Prevalence 0.05455 0.04545
## Detection Rate 0.04545 0.04545
## Detection Prevalence 0.04545 0.04545
## Balanced Accuracy 0.91667 1.00000
mcrp1
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 0 0 0 0 1
## Business 1 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 0 0 0 0 0
## IT & Technology 0 1 0 1 0
## Marketing 0 0 0 0 0
## Other 0 0 0 0 0
## Science 0 1 1 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 1 0 0 0
## Business 0 0 0 2 0
## Education 0 0 0 0 0
## Engineering 0 0 1 1 0
## Finance 0 0 0 0 0
## Hospitality 1 1 0 0 0
## IT & Technology 0 3 1 0 0
## Marketing 0 1 2 0 1
## Other 0 0 0 1 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 1
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 0
## Marketing 0
## Other 0
## Science 0
## Social Sciences 0
##
## Overall Statistics
##
## Accuracy : 0.3043
## 95% CI : (0.1321, 0.5292)
## No Information Rate : 0.2609
## P-Value [Acc > NIR] : 0.3925
##
## Kappa : 0.1947
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 0.00000 0.00000 0.00000
## Specificity 0.90909 0.80952 1.00000
## Pos Pred Value 0.00000 0.00000 NaN
## Neg Pred Value 0.95238 0.89474 0.95652
## Prevalence 0.04348 0.08696 0.04348
## Detection Rate 0.00000 0.00000 0.00000
## Detection Prevalence 0.08696 0.17391 0.00000
## Balanced Accuracy 0.45455 0.40476 0.50000
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 0.00000 0.00000 1.00000
## Specificity 0.90909 1.00000 0.95455
## Pos Pred Value 0.00000 NaN 0.50000
## Neg Pred Value 0.95238 0.95652 1.00000
## Prevalence 0.04348 0.04348 0.04348
## Detection Rate 0.00000 0.00000 0.04348
## Detection Prevalence 0.08696 0.00000 0.08696
## Balanced Accuracy 0.45455 0.50000 0.97727
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 0.5000 0.50000 0.25000
## Specificity 0.8235 0.89474 1.00000
## Pos Pred Value 0.5000 0.50000 1.00000
## Neg Pred Value 0.8235 0.89474 0.86364
## Prevalence 0.2609 0.17391 0.17391
## Detection Rate 0.1304 0.08696 0.04348
## Detection Prevalence 0.2609 0.17391 0.04348
## Balanced Accuracy 0.6618 0.69737 0.62500
## Class: Science Class: Social Sciences
## Sensitivity 0.00000 0.00000
## Specificity 0.90909 1.00000
## Pos Pred Value 0.00000 NaN
## Neg Pred Value 0.95238 0.95652
## Prevalence 0.04348 0.04348
## Detection Rate 0.00000 0.00000
## Detection Prevalence 0.08696 0.00000
## Balanced Accuracy 0.45455 0.50000
modelo2 <- train(
formula_modelo, data = entrenamiento,
method = "svmRadial",
preProcess = c("scale", "center"),
trControl = ctrl,
tuneGrid = data.frame(sigma = 0.1, C = 1)
)
resultado_entrenamiento2 <- predict(modelo2, entrenamiento)
resultado_prueba2 <- predict(modelo2, prueba)
mcre2 <- confusionMatrix(resultado_entrenamiento2, entrenamiento[[target]])
mcre2
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 4 0 0 0 0
## Business 0 10 0 0 0
## Education 0 0 4 0 0
## Engineering 0 0 0 5 0
## Finance 0 0 0 0 6
## Hospitality 0 0 0 0 0
## IT & Technology 1 2 0 1 0
## Marketing 0 0 0 0 0
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 5 0 0 0 0
## IT & Technology 0 27 0 0 1
## Marketing 0 0 17 0 0
## Other 0 0 0 17 1
## Science 0 0 0 0 4
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 0
## Marketing 0
## Other 0
## Science 0
## Social Sciences 5
##
## Overall Statistics
##
## Accuracy : 0.9455
## 95% CI : (0.8851, 0.9797)
## No Information Rate : 0.2455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9362
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 0.80000 0.83333 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 0.99057 0.98000 1.00000
## Prevalence 0.04545 0.10909 0.03636
## Detection Rate 0.03636 0.09091 0.03636
## Detection Prevalence 0.03636 0.09091 0.03636
## Balanced Accuracy 0.90000 0.91667 1.00000
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 0.83333 1.00000 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 0.99048 1.00000 1.00000
## Prevalence 0.05455 0.05455 0.04545
## Detection Rate 0.04545 0.05455 0.04545
## Detection Prevalence 0.04545 0.05455 0.04545
## Balanced Accuracy 0.91667 1.00000 1.00000
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 0.9398 1.0000 0.9892
## Pos Pred Value 0.8438 1.0000 0.9444
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.2455 0.1545 0.1545
## Detection Rate 0.2455 0.1545 0.1545
## Detection Prevalence 0.2909 0.1545 0.1636
## Balanced Accuracy 0.9699 1.0000 0.9946
## Class: Science Class: Social Sciences
## Sensitivity 0.66667 1.00000
## Specificity 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000
## Neg Pred Value 0.98113 1.00000
## Prevalence 0.05455 0.04545
## Detection Rate 0.03636 0.04545
## Detection Prevalence 0.03636 0.04545
## Balanced Accuracy 0.83333 1.00000
mcrp2 <- confusionMatrix(resultado_prueba2, prueba[[target]])
mcrp2
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 0 0 0 0 0
## IT & Technology 0 1 1 1 1
## Marketing 1 1 0 0 0
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 1 0 0 0 0
## IT & Technology 0 6 1 4 1
## Marketing 0 0 3 0 0
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 1
## Marketing 0
## Other 0
## Science 0
## Social Sciences 0
##
## Overall Statistics
##
## Accuracy : 0.4348
## 95% CI : (0.2319, 0.6551)
## No Information Rate : 0.2609
## P-Value [Acc > NIR] : 0.05323
##
## Kappa : 0.2635
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 0.00000 0.00000 0.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value NaN NaN NaN
## Neg Pred Value 0.95652 0.91304 0.95652
## Prevalence 0.04348 0.08696 0.04348
## Detection Rate 0.00000 0.00000 0.00000
## Detection Prevalence 0.00000 0.00000 0.00000
## Balanced Accuracy 0.50000 0.50000 0.50000
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 0.00000 0.00000 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value NaN NaN 1.00000
## Neg Pred Value 0.95652 0.95652 1.00000
## Prevalence 0.04348 0.04348 0.04348
## Detection Rate 0.00000 0.00000 0.04348
## Detection Prevalence 0.00000 0.00000 0.04348
## Balanced Accuracy 0.50000 0.50000 1.00000
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 1.0000 0.7500 0.0000
## Specificity 0.3529 0.8947 1.0000
## Pos Pred Value 0.3529 0.6000 NaN
## Neg Pred Value 1.0000 0.9444 0.8261
## Prevalence 0.2609 0.1739 0.1739
## Detection Rate 0.2609 0.1304 0.0000
## Detection Prevalence 0.7391 0.2174 0.0000
## Balanced Accuracy 0.6765 0.8224 0.5000
## Class: Science Class: Social Sciences
## Sensitivity 0.00000 0.00000
## Specificity 1.00000 1.00000
## Pos Pred Value NaN NaN
## Neg Pred Value 0.95652 0.95652
## Prevalence 0.04348 0.04348
## Detection Rate 0.00000 0.00000
## Detection Prevalence 0.00000 0.00000
## Balanced Accuracy 0.50000 0.50000
modelo3 <- train(
formula_modelo, data = entrenamiento,
method = "svmPoly",
preProcess = c("scale", "center"),
trControl = ctrl,
tuneGrid = data.frame(degree = 2, scale = 1, C = 1)
)
resultado_entrenamiento3 <- predict(modelo3, entrenamiento)
resultado_prueba3 <- predict(modelo3, prueba)
mcre3 <- confusionMatrix(resultado_entrenamiento3, entrenamiento[[target]])
mcre3
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 5 0 0 0 0
## Business 0 12 0 0 0
## Education 0 0 4 0 0
## Engineering 0 0 0 6 0
## Finance 0 0 0 0 6
## Hospitality 0 0 0 0 0
## IT & Technology 0 0 0 0 0
## Marketing 0 0 0 0 0
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 5 0 0 0 0
## IT & Technology 0 27 0 0 0
## Marketing 0 0 17 0 0
## Other 0 0 0 17 0
## Science 0 0 0 0 6
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 0
## Marketing 0
## Other 0
## Science 0
## Social Sciences 5
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.967, 1)
## No Information Rate : 0.2455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 1.00000 1.0000 1.00000
## Specificity 1.00000 1.0000 1.00000
## Pos Pred Value 1.00000 1.0000 1.00000
## Neg Pred Value 1.00000 1.0000 1.00000
## Prevalence 0.04545 0.1091 0.03636
## Detection Rate 0.04545 0.1091 0.03636
## Detection Prevalence 0.04545 0.1091 0.03636
## Balanced Accuracy 1.00000 1.0000 1.00000
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 1.00000 1.00000 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000
## Prevalence 0.05455 0.05455 0.04545
## Detection Rate 0.05455 0.05455 0.04545
## Detection Prevalence 0.05455 0.05455 0.04545
## Balanced Accuracy 1.00000 1.00000 1.00000
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.2455 0.1545 0.1545
## Detection Rate 0.2455 0.1545 0.1545
## Detection Prevalence 0.2455 0.1545 0.1545
## Balanced Accuracy 1.0000 1.0000 1.0000
## Class: Science Class: Social Sciences
## Sensitivity 1.00000 1.00000
## Specificity 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000
## Prevalence 0.05455 0.04545
## Detection Rate 0.05455 0.04545
## Detection Prevalence 0.05455 0.04545
## Balanced Accuracy 1.00000 1.00000
mcrp3 <- confusionMatrix(resultado_prueba3, prueba[[target]])
mcrp3
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 0 0 0 0 1
## Business 1 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 0 0 0 0 0
## IT & Technology 0 1 1 1 0
## Marketing 0 1 0 0 0
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 1 0 0 0
## Business 0 1 0 0 0
## Education 0 0 0 1 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 1 1 0 0 0
## IT & Technology 0 2 0 0 0
## Marketing 0 0 4 0 1
## Other 0 1 0 2 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 1 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 1
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 0
## Marketing 0
## Other 0
## Science 0
## Social Sciences 0
##
## Overall Statistics
##
## Accuracy : 0.3913
## 95% CI : (0.1971, 0.6146)
## No Information Rate : 0.2609
## P-Value [Acc > NIR] : 0.1196
##
## Kappa : 0.286
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 0.00000 0.00000 0.00000
## Specificity 0.90909 0.85714 0.95455
## Pos Pred Value 0.00000 0.00000 0.00000
## Neg Pred Value 0.95238 0.90000 0.95455
## Prevalence 0.04348 0.08696 0.04348
## Detection Rate 0.00000 0.00000 0.00000
## Detection Prevalence 0.08696 0.13043 0.04348
## Balanced Accuracy 0.45455 0.42857 0.47727
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 0.00000 0.00000 1.00000
## Specificity 1.00000 1.00000 0.95455
## Pos Pred Value NaN NaN 0.50000
## Neg Pred Value 0.95652 0.95652 1.00000
## Prevalence 0.04348 0.04348 0.04348
## Detection Rate 0.00000 0.00000 0.04348
## Detection Prevalence 0.00000 0.00000 0.08696
## Balanced Accuracy 0.50000 0.50000 0.97727
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 0.33333 1.0000 0.50000
## Specificity 0.82353 0.8947 0.94737
## Pos Pred Value 0.40000 0.6667 0.66667
## Neg Pred Value 0.77778 1.0000 0.90000
## Prevalence 0.26087 0.1739 0.17391
## Detection Rate 0.08696 0.1739 0.08696
## Detection Prevalence 0.21739 0.2609 0.13043
## Balanced Accuracy 0.57843 0.9474 0.72368
## Class: Science Class: Social Sciences
## Sensitivity 0.00000 0.00000
## Specificity 1.00000 0.95455
## Pos Pred Value NaN 0.00000
## Neg Pred Value 0.95652 0.95455
## Prevalence 0.04348 0.04348
## Detection Rate 0.00000 0.00000
## Detection Prevalence 0.00000 0.04348
## Balanced Accuracy 0.50000 0.47727
modelo4 <- train(
formula_modelo, data = entrenamiento,
method = "rpart",
preProcess = c("scale", "center"),
trControl = ctrl,
tuneLength = 10
)
resultado_entrenamiento4 <- predict(modelo4, entrenamiento)
resultado_prueba4 <- predict(modelo4, prueba)
mcre4 <- confusionMatrix(resultado_entrenamiento4, entrenamiento[[target]])
mcre4
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 0 0 0 0 0
## IT & Technology 3 7 3 4 2
## Marketing 2 5 1 2 4
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 0 0 0 0 0
## IT & Technology 3 24 8 14 2
## Marketing 2 3 9 3 4
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 2
## Marketing 3
## Other 0
## Science 0
## Social Sciences 0
##
## Overall Statistics
##
## Accuracy : 0.3
## 95% CI : (0.2163, 0.3948)
## No Information Rate : 0.2455
## P-Value [Acc > NIR] : 0.113
##
## Kappa : 0.1094
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 0.00000 0.0000 0.00000
## Specificity 1.00000 1.0000 1.00000
## Pos Pred Value NaN NaN NaN
## Neg Pred Value 0.95455 0.8909 0.96364
## Prevalence 0.04545 0.1091 0.03636
## Detection Rate 0.00000 0.0000 0.00000
## Detection Prevalence 0.00000 0.0000 0.00000
## Balanced Accuracy 0.50000 0.5000 0.50000
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 0.00000 0.00000 0.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value NaN NaN NaN
## Neg Pred Value 0.94545 0.94545 0.95455
## Prevalence 0.05455 0.05455 0.04545
## Detection Rate 0.00000 0.00000 0.00000
## Detection Prevalence 0.00000 0.00000 0.00000
## Balanced Accuracy 0.50000 0.50000 0.50000
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 0.8889 0.52941 0.0000
## Specificity 0.4217 0.68817 1.0000
## Pos Pred Value 0.3333 0.23684 NaN
## Neg Pred Value 0.9211 0.88889 0.8455
## Prevalence 0.2455 0.15455 0.1545
## Detection Rate 0.2182 0.08182 0.0000
## Detection Prevalence 0.6545 0.34545 0.0000
## Balanced Accuracy 0.6553 0.60879 0.5000
## Class: Science Class: Social Sciences
## Sensitivity 0.00000 0.00000
## Specificity 1.00000 1.00000
## Pos Pred Value NaN NaN
## Neg Pred Value 0.94545 0.95455
## Prevalence 0.05455 0.04545
## Detection Rate 0.00000 0.00000
## Detection Prevalence 0.00000 0.00000
## Balanced Accuracy 0.50000 0.50000
mcrp4 <- confusionMatrix(resultado_prueba4, prueba[[target]])
mcrp4
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 0 0 0 0 0
## IT & Technology 0 1 1 1 1
## Marketing 1 1 0 0 0
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 0 0 0 0 0
## IT & Technology 1 6 2 2 0
## Marketing 0 0 2 2 1
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 1
## Marketing 0
## Other 0
## Science 0
## Social Sciences 0
##
## Overall Statistics
##
## Accuracy : 0.3478
## 95% CI : (0.1638, 0.5727)
## No Information Rate : 0.2609
## P-Value [Acc > NIR] : 0.2325
##
## Kappa : 0.1481
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 0.00000 0.00000 0.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value NaN NaN NaN
## Neg Pred Value 0.95652 0.91304 0.95652
## Prevalence 0.04348 0.08696 0.04348
## Detection Rate 0.00000 0.00000 0.00000
## Detection Prevalence 0.00000 0.00000 0.00000
## Balanced Accuracy 0.50000 0.50000 0.50000
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 0.00000 0.00000 0.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value NaN NaN NaN
## Neg Pred Value 0.95652 0.95652 0.95652
## Prevalence 0.04348 0.04348 0.04348
## Detection Rate 0.00000 0.00000 0.00000
## Detection Prevalence 0.00000 0.00000 0.00000
## Balanced Accuracy 0.50000 0.50000 0.50000
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 1.0000 0.50000 0.0000
## Specificity 0.4118 0.73684 1.0000
## Pos Pred Value 0.3750 0.28571 NaN
## Neg Pred Value 1.0000 0.87500 0.8261
## Prevalence 0.2609 0.17391 0.1739
## Detection Rate 0.2609 0.08696 0.0000
## Detection Prevalence 0.6957 0.30435 0.0000
## Balanced Accuracy 0.7059 0.61842 0.5000
## Class: Science Class: Social Sciences
## Sensitivity 0.00000 0.00000
## Specificity 1.00000 1.00000
## Pos Pred Value NaN NaN
## Neg Pred Value 0.95652 0.95652
## Prevalence 0.04348 0.04348
## Detection Rate 0.00000 0.00000
## Detection Prevalence 0.00000 0.00000
## Balanced Accuracy 0.50000 0.50000
modelo5 <- train(
formula_modelo, data = entrenamiento,
method = "nnet",
preProcess = c("scale", "center"),
trControl = ctrl,
tuneLength = 5,
trace = FALSE
)
resultado_entrenamiento5 <- predict(modelo5, entrenamiento)
resultado_prueba5 <- predict(modelo5, prueba)
mcre5 <- confusionMatrix(resultado_entrenamiento5, entrenamiento[[target]])
mcre5
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 5 0 0 0 0
## Business 0 12 0 0 0
## Education 0 0 4 0 0
## Engineering 0 0 0 6 0
## Finance 0 0 0 0 6
## Hospitality 0 0 0 0 0
## IT & Technology 0 0 0 0 0
## Marketing 0 0 0 0 0
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 5 0 0 0 0
## IT & Technology 0 27 0 0 0
## Marketing 0 0 17 0 0
## Other 0 0 0 17 0
## Science 0 0 0 0 6
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 0
## Marketing 0
## Other 0
## Science 0
## Social Sciences 5
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.967, 1)
## No Information Rate : 0.2455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 1.00000 1.0000 1.00000
## Specificity 1.00000 1.0000 1.00000
## Pos Pred Value 1.00000 1.0000 1.00000
## Neg Pred Value 1.00000 1.0000 1.00000
## Prevalence 0.04545 0.1091 0.03636
## Detection Rate 0.04545 0.1091 0.03636
## Detection Prevalence 0.04545 0.1091 0.03636
## Balanced Accuracy 1.00000 1.0000 1.00000
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 1.00000 1.00000 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000
## Prevalence 0.05455 0.05455 0.04545
## Detection Rate 0.05455 0.05455 0.04545
## Detection Prevalence 0.05455 0.05455 0.04545
## Balanced Accuracy 1.00000 1.00000 1.00000
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.2455 0.1545 0.1545
## Detection Rate 0.2455 0.1545 0.1545
## Detection Prevalence 0.2455 0.1545 0.1545
## Balanced Accuracy 1.0000 1.0000 1.0000
## Class: Science Class: Social Sciences
## Sensitivity 1.00000 1.00000
## Specificity 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000
## Prevalence 0.05455 0.04545
## Detection Rate 0.05455 0.04545
## Detection Prevalence 0.05455 0.04545
## Balanced Accuracy 1.00000 1.00000
mcrp5 <- confusionMatrix(resultado_prueba5, prueba[[target]])
mcrp5
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 0 0 0 0 1
## Business 0 1 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 0 1 0 0 0
## IT & Technology 1 0 0 1 0
## Marketing 0 0 0 0 0
## Other 0 0 0 0 0
## Science 0 0 1 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 1 0 0 0
## Engineering 0 1 0 0 0
## Finance 0 0 1 0 1
## Hospitality 1 0 0 0 0
## IT & Technology 0 2 1 0 0
## Marketing 0 0 2 1 0
## Other 0 1 0 3 0
## Science 0 1 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 1
## Hospitality 0
## IT & Technology 0
## Marketing 0
## Other 0
## Science 0
## Social Sciences 0
##
## Overall Statistics
##
## Accuracy : 0.3913
## 95% CI : (0.1971, 0.6146)
## No Information Rate : 0.2609
## P-Value [Acc > NIR] : 0.1196
##
## Kappa : 0.2985
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 0.00000 0.50000 0.00000
## Specificity 0.95455 1.00000 0.95455
## Pos Pred Value 0.00000 1.00000 0.00000
## Neg Pred Value 0.95455 0.95455 0.95455
## Prevalence 0.04348 0.08696 0.04348
## Detection Rate 0.00000 0.04348 0.00000
## Detection Prevalence 0.04348 0.04348 0.04348
## Balanced Accuracy 0.47727 0.75000 0.47727
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 0.00000 0.00000 1.00000
## Specificity 0.95455 0.86364 0.95455
## Pos Pred Value 0.00000 0.00000 0.50000
## Neg Pred Value 0.95455 0.95000 1.00000
## Prevalence 0.04348 0.04348 0.04348
## Detection Rate 0.00000 0.00000 0.04348
## Detection Prevalence 0.04348 0.13043 0.08696
## Balanced Accuracy 0.47727 0.43182 0.97727
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 0.33333 0.50000 0.7500
## Specificity 0.82353 0.94737 0.9474
## Pos Pred Value 0.40000 0.66667 0.7500
## Neg Pred Value 0.77778 0.90000 0.9474
## Prevalence 0.26087 0.17391 0.1739
## Detection Rate 0.08696 0.08696 0.1304
## Detection Prevalence 0.21739 0.13043 0.1739
## Balanced Accuracy 0.57843 0.72368 0.8487
## Class: Science Class: Social Sciences
## Sensitivity 0.00000 0.00000
## Specificity 0.90909 1.00000
## Pos Pred Value 0.00000 NaN
## Neg Pred Value 0.95238 0.95652
## Prevalence 0.04348 0.04348
## Detection Rate 0.00000 0.00000
## Detection Prevalence 0.08696 0.00000
## Balanced Accuracy 0.45455 0.50000
modelo6 <- train(
formula_modelo, data = entrenamiento,
method = "rf",
preProcess = c("scale", "center"),
trControl = ctrl,
tuneGrid = expand.grid(mtry = c(2, 4, 6))
)
resultado_entrenamiento6 <- predict(modelo6, entrenamiento)
resultado_prueba6 <- predict(modelo6, prueba)
mcre6 <- confusionMatrix(resultado_entrenamiento6, entrenamiento[[target]])
mcre6
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 5 0 0 0 0
## Business 0 12 0 0 0
## Education 0 0 4 0 0
## Engineering 0 0 0 6 0
## Finance 0 0 0 0 6
## Hospitality 0 0 0 0 0
## IT & Technology 0 0 0 0 0
## Marketing 0 0 0 0 0
## Other 0 0 0 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 0 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 5 0 0 0 0
## IT & Technology 0 27 0 0 0
## Marketing 0 0 17 0 0
## Other 0 0 0 17 0
## Science 0 0 0 0 6
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 0
## Marketing 0
## Other 0
## Science 0
## Social Sciences 5
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.967, 1)
## No Information Rate : 0.2455
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 1.00000 1.0000 1.00000
## Specificity 1.00000 1.0000 1.00000
## Pos Pred Value 1.00000 1.0000 1.00000
## Neg Pred Value 1.00000 1.0000 1.00000
## Prevalence 0.04545 0.1091 0.03636
## Detection Rate 0.04545 0.1091 0.03636
## Detection Prevalence 0.04545 0.1091 0.03636
## Balanced Accuracy 1.00000 1.0000 1.00000
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 1.00000 1.00000 1.00000
## Specificity 1.00000 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000 1.00000
## Prevalence 0.05455 0.05455 0.04545
## Detection Rate 0.05455 0.05455 0.04545
## Detection Prevalence 0.05455 0.05455 0.04545
## Balanced Accuracy 1.00000 1.00000 1.00000
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.2455 0.1545 0.1545
## Detection Rate 0.2455 0.1545 0.1545
## Detection Prevalence 0.2455 0.1545 0.1545
## Balanced Accuracy 1.0000 1.0000 1.0000
## Class: Science Class: Social Sciences
## Sensitivity 1.00000 1.00000
## Specificity 1.00000 1.00000
## Pos Pred Value 1.00000 1.00000
## Neg Pred Value 1.00000 1.00000
## Prevalence 0.05455 0.04545
## Detection Rate 0.05455 0.04545
## Detection Prevalence 0.05455 0.04545
## Balanced Accuracy 1.00000 1.00000
mcrp6 <- confusionMatrix(resultado_prueba6, prueba[[target]])
mcrp6
## Confusion Matrix and Statistics
##
## Reference
## Prediction Arts & Culture Business Education Engineering Finance
## Arts & Culture 0 0 0 0 1
## Business 0 0 0 0 0
## Education 0 0 0 0 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 0 0 0 0 0
## IT & Technology 0 1 0 1 0
## Marketing 0 1 0 0 0
## Other 1 0 1 0 0
## Science 0 0 0 0 0
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Hospitality IT & Technology Marketing Other Science
## Arts & Culture 0 1 0 0 0
## Business 0 0 0 0 0
## Education 0 0 0 1 0
## Engineering 0 0 0 0 0
## Finance 0 0 0 0 0
## Hospitality 1 1 0 1 0
## IT & Technology 0 4 2 1 0
## Marketing 0 0 2 1 0
## Other 0 0 0 0 0
## Science 0 0 0 0 1
## Social Sciences 0 0 0 0 0
## Reference
## Prediction Social Sciences
## Arts & Culture 0
## Business 0
## Education 0
## Engineering 0
## Finance 0
## Hospitality 0
## IT & Technology 1
## Marketing 0
## Other 0
## Science 0
## Social Sciences 0
##
## Overall Statistics
##
## Accuracy : 0.3478
## 95% CI : (0.1638, 0.5727)
## No Information Rate : 0.2609
## P-Value [Acc > NIR] : 0.2325
##
## Kappa : 0.2123
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Arts & Culture Class: Business Class: Education
## Sensitivity 0.00000 0.00000 0.00000
## Specificity 0.90909 1.00000 0.95455
## Pos Pred Value 0.00000 NaN 0.00000
## Neg Pred Value 0.95238 0.91304 0.95455
## Prevalence 0.04348 0.08696 0.04348
## Detection Rate 0.00000 0.00000 0.00000
## Detection Prevalence 0.08696 0.00000 0.04348
## Balanced Accuracy 0.45455 0.50000 0.47727
## Class: Engineering Class: Finance Class: Hospitality
## Sensitivity 0.00000 0.00000 1.00000
## Specificity 1.00000 1.00000 0.90909
## Pos Pred Value NaN NaN 0.33333
## Neg Pred Value 0.95652 0.95652 1.00000
## Prevalence 0.04348 0.04348 0.04348
## Detection Rate 0.00000 0.00000 0.04348
## Detection Prevalence 0.00000 0.00000 0.13043
## Balanced Accuracy 0.50000 0.50000 0.95455
## Class: IT & Technology Class: Marketing Class: Other
## Sensitivity 0.6667 0.50000 0.00000
## Specificity 0.6471 0.89474 0.89474
## Pos Pred Value 0.4000 0.50000 0.00000
## Neg Pred Value 0.8462 0.89474 0.80952
## Prevalence 0.2609 0.17391 0.17391
## Detection Rate 0.1739 0.08696 0.00000
## Detection Prevalence 0.4348 0.17391 0.08696
## Balanced Accuracy 0.6569 0.69737 0.44737
## Class: Science Class: Social Sciences
## Sensitivity 1.00000 0.00000
## Specificity 1.00000 1.00000
## Pos Pred Value 1.00000 NaN
## Neg Pred Value 1.00000 0.95652
## Prevalence 0.04348 0.04348
## Detection Rate 0.04348 0.00000
## Detection Prevalence 0.04348 0.00000
## Balanced Accuracy 1.00000 0.50000
table(pred_test1)
## pred_test1
## Arts & Culture Business Education Engineering Finance
## 2 4 0 2 0
## Hospitality IT & Technology Marketing Other Science
## 2 6 4 1 2
## Social Sciences
## 0