#1. Guess Algoritm (Variavéis resposta: Categorica variavel explicativa: Numerica (1)) - Só importa a variavel explicativa neste algoritmo
Algoritmo que escolhe p% de vezes um resultado. O treino deste algoritmo prende-se em otimizar p, isto é, o valor de p que retorna maior accuracy nos testes.
##1.Exportação de Dados
# define the outcome and predictors
data(heights)
y <- heights$sex
x <- heights$height
#Divisão do Treino e do Teste
set.seed(2)
test_index <- createDataPartition(y, times = 1, p = 0.5, list = FALSE)
test_set <- heights[test_index, ]
train_set <- heights[-test_index, ]
head(test_set)
## sex height
## 3 Male 68
## 4 Male 74
## 5 Male 61
## 6 Female 65
## 8 Female 62
## 13 Male 69
##2.Analise de dados
ggpairs(heights, columns = 1:2, ggplot2::aes(colour=sex))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##3.Guess Algoritm
# guess the outcome
pp <- seq(0, 1, 0.001)
accuracy <- map_df(pp, function(p){ ##Este algoritmo descobre o melhor p
n <- length(test_index)
y_hat <- sample(c("Male", "Female"), n, replace = TRUE, prob=c(p, 1-p)) %>%
factor(levels = levels(train_set$sex))
accu=mean(y_hat == train_set$sex)
tibble(accura=accu)
})
p=pp[which.max(accuracy$accura)]
p
## [1] 0.999
n <- length(test_index)
y_hat <- sample(c("Male", "Female"), n, replace = TRUE, prob=c(p, 1-p)) %>%
factor(levels = levels(test_set$sex))
mean(y_hat == test_set$sex)
## [1] 0.7733333
confusionMatrix(data = y_hat,reference = test_set$sex)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Female Male
## Female 0 0
## Male 119 406
##
## Accuracy : 0.7733
## 95% CI : (0.7351, 0.8085)
## No Information Rate : 0.7733
## P-Value [Acc > NIR] : 0.5246
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.7733
## Prevalence : 0.2267
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : Female
##