Clase Discriminación y Clasificación
library(readxl)
datos = read_excel("discriminantes_051923.xlsx")
head(datos)
datos$g = as.factor(datos$g)
datos
library(rgl)
plot3d(datos[,-4], type='s',
col=as.numeric(datos$g))
library(ggplot2)
ggplot(datos)+
aes(DE, fill=g)+
geom_density(alpha=0.5)

ggplot(datos)+
aes(DL, fill=g)+
geom_density(alpha=0.5)

ggplot(datos)+
aes(BIO, fill=g)+
geom_density(alpha=0.5)

ggplot(datos)+
aes(DE, DL, color=g)+
geom_density_2d()

ggplot(datos)+
aes(DE, BIO, color=g)+
geom_density_2d()

ggplot(datos)+
aes(DL, BIO, color=g)+
geom_density_2d()

set.seed(123)
muest = sample(x = c(T, F), size = nrow(datos),
replace = T, prob = c(0.8,0.2))
X_train = datos[muest, -4]
y_train = datos[muest, 4]
X_test = datos[!muest, -4]
y_test = datos[!muest, 4]
C12 = 1
C21 = 2
cont = table(datos$g)
P1 = cont['enferma']/sum(cont)
P2 = cont['sana']/sum(cont)
mu1 = colMeans(X_train[y_train$g=='enferma',])
mu2 = colMeans(X_train[y_train$g=='sana',])
mu_d = mu1 - mu2
mu_s = mu1 + mu2
S1 = cov(X_train[y_train$g=='enferma',])
S2 = cov(X_train[y_train$g=='sana',])
n1 = nrow(X_train[y_train$g=='enferma',])
n2 = nrow(X_train[y_train$g=='sana',])
Sp = ((n1-1)/((n1-1)+(n2-1)))*S1 + ((n2-1)/((n1-1)+(n2-1)))*S2
Spi = solve(Sp)
X0 = unlist(X_test[1,])
model_izq = t(mu_d) %*% Spi %*% X0 - (1 / 2) * t(mu_d) %*% Spi %*% mu_s
model_der = log((C12/C21)*(P2/P1))
ifelse(model_izq >= model_der,
'enferma', 'sana')
## [,1]
## [1,] "enferma"
part1 = t(mu_d) %*% Spi %*% t(X_test)
part2 = (1 / 2) * t(mu_d) %*% Spi %*% mu_s
mod_izq = part1 - part2[1,1]
y_pred = sapply(mod_izq, function(x0_i){
ifelse(x0_i >= model_der,
'enferma', 'sana')
})